{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "f86f05f1-4d46-4c36-90c7-278a9a77cd65",
   "metadata": {},
   "source": [
    "# Feature generation of WoE"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3f65d4a7-5877-4656-b687-db3d36d5535c",
   "metadata": {},
   "source": [
    "Weight of evidence (WoE) is a measure of how much the evidence supports or undermines a hypothesis. It measures the relative risk of an attribute of binning level. Since it evolved from credit scoring world, it is generally described as a measure of the separation of good and bad customers."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "b68878b3-3bf2-49e4-b551-030160e4289c",
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "sys.path.append('../../../')\n",
    "from python.algos.feature import WoeEncoder\n",
    "\n",
    "import pyspark.sql.functions as F\n",
    "from pyspark.ml.feature import QuantileDiscretizer\n",
    "import pandas as pd\n",
    "\n",
    "pd.set_option('display.max_rows', None)\n",
    "pd.set_option('display.max_columns', None)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8b334f0e-5a42-4bc6-8f40-d6dc4b3c7c50",
   "metadata": {},
   "source": [
    "#### Init spark"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "22553d10-e614-4397-a614-12b353df4729",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/spark/.local/lib/python3.8/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
      "  from .autonotebook import tqdm as notebook_tqdm\n",
      "WARNING: An illegal reflective access operation has occurred\n",
      "WARNING: Illegal reflective access by org.apache.spark.unsafe.Platform (file:/opt/spark/jars/spark-unsafe_2.12-3.1.2.jar) to constructor java.nio.DirectByteBuffer(long,int)\n",
      "WARNING: Please consider reporting this to the maintainers of org.apache.spark.unsafe.Platform\n",
      "WARNING: Use --illegal-access=warn to enable warnings of further illegal reflective access operations\n",
      "WARNING: All illegal access operations will be denied in a future release\n",
      "22/06/10 11:18:26 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable\n",
      "Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties\n",
      "Setting default log level to \"WARN\".\n",
      "To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n",
      "22/06/10 11:18:27 WARN Utils: Service 'SparkUI' could not bind on port 4040. Attempting port 4041.\n"
     ]
    }
   ],
   "source": [
    "import metaspore as ms\n",
    "\n",
    "spark_confs={\n",
    "        \"spark.network.timeout\":\"500\",\n",
    "        \"spark.sql.codegen.wholeStage\": \"false\"\n",
    "    }\n",
    "\n",
    "spark_session = ms.spark.get_session(local=True, app_name='Feature generation WoE Demo', spark_confs=spark_confs)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7fe6ee4a-2710-4be5-9e40-7e95fa23afbc",
   "metadata": {},
   "source": [
    "#### Load trainning dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "bd50be40-b44b-4af5-a937-cac5f3d6eee0",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "22/06/10 11:18:32 WARN package: Truncated the string representation of a plan since it was too large. This behavior can be adjusted by setting 'spark.sql.debug.maxToStringFields'.\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>loanAmnt</th>\n",
       "      <th>term</th>\n",
       "      <th>interestRate</th>\n",
       "      <th>installment</th>\n",
       "      <th>grade</th>\n",
       "      <th>subGrade</th>\n",
       "      <th>employmentTitle</th>\n",
       "      <th>employmentLength</th>\n",
       "      <th>homeOwnership</th>\n",
       "      <th>annualIncome</th>\n",
       "      <th>verificationStatus</th>\n",
       "      <th>issueDate</th>\n",
       "      <th>isDefault</th>\n",
       "      <th>purpose</th>\n",
       "      <th>postCode</th>\n",
       "      <th>regionCode</th>\n",
       "      <th>dti</th>\n",
       "      <th>delinquency_2years</th>\n",
       "      <th>ficoRangeLow</th>\n",
       "      <th>ficoRangeHigh</th>\n",
       "      <th>openAcc</th>\n",
       "      <th>pubRec</th>\n",
       "      <th>pubRecBankruptcies</th>\n",
       "      <th>revolBal</th>\n",
       "      <th>revolUtil</th>\n",
       "      <th>totalAcc</th>\n",
       "      <th>initialListStatus</th>\n",
       "      <th>applicationType</th>\n",
       "      <th>earliesCreditLine</th>\n",
       "      <th>title</th>\n",
       "      <th>policyCode</th>\n",
       "      <th>n0</th>\n",
       "      <th>n1</th>\n",
       "      <th>n2</th>\n",
       "      <th>n3</th>\n",
       "      <th>n4</th>\n",
       "      <th>n5</th>\n",
       "      <th>n6</th>\n",
       "      <th>n7</th>\n",
       "      <th>n8</th>\n",
       "      <th>n9</th>\n",
       "      <th>n10</th>\n",
       "      <th>n11</th>\n",
       "      <th>n12</th>\n",
       "      <th>n13</th>\n",
       "      <th>n14</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>35000.0</td>\n",
       "      <td>5</td>\n",
       "      <td>19.52</td>\n",
       "      <td>917.97</td>\n",
       "      <td>E</td>\n",
       "      <td>E2</td>\n",
       "      <td>320.0</td>\n",
       "      <td>2 years</td>\n",
       "      <td>2</td>\n",
       "      <td>110000.0</td>\n",
       "      <td>2</td>\n",
       "      <td>2014-07-01</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>137.0</td>\n",
       "      <td>32</td>\n",
       "      <td>17.05</td>\n",
       "      <td>0.0</td>\n",
       "      <td>730.0</td>\n",
       "      <td>734.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>24178.0</td>\n",
       "      <td>48.9</td>\n",
       "      <td>27.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>Aug-2001</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>18000.0</td>\n",
       "      <td>5</td>\n",
       "      <td>18.49</td>\n",
       "      <td>461.9</td>\n",
       "      <td>D</td>\n",
       "      <td>D2</td>\n",
       "      <td>219843.0</td>\n",
       "      <td>5 years</td>\n",
       "      <td>0</td>\n",
       "      <td>46000.0</td>\n",
       "      <td>2</td>\n",
       "      <td>2012-08-01</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>156.0</td>\n",
       "      <td>18</td>\n",
       "      <td>27.83</td>\n",
       "      <td>0.0</td>\n",
       "      <td>700.0</td>\n",
       "      <td>704.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>15096.0</td>\n",
       "      <td>38.9</td>\n",
       "      <td>18.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>May-2002</td>\n",
       "      <td>1723.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>10.0</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>13.0</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>12000.0</td>\n",
       "      <td>5</td>\n",
       "      <td>16.99</td>\n",
       "      <td>298.17</td>\n",
       "      <td>D</td>\n",
       "      <td>D3</td>\n",
       "      <td>31698.0</td>\n",
       "      <td>8 years</td>\n",
       "      <td>0</td>\n",
       "      <td>74000.0</td>\n",
       "      <td>2</td>\n",
       "      <td>2015-10-01</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>337.0</td>\n",
       "      <td>14</td>\n",
       "      <td>22.77</td>\n",
       "      <td>0.0</td>\n",
       "      <td>675.0</td>\n",
       "      <td>679.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4606.0</td>\n",
       "      <td>51.8</td>\n",
       "      <td>27.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>May-2006</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>21.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>11000.0</td>\n",
       "      <td>3</td>\n",
       "      <td>7.26</td>\n",
       "      <td>340.96</td>\n",
       "      <td>A</td>\n",
       "      <td>A4</td>\n",
       "      <td>46854.0</td>\n",
       "      <td>10+ years</td>\n",
       "      <td>1</td>\n",
       "      <td>118000.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2015-08-01</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>148.0</td>\n",
       "      <td>11</td>\n",
       "      <td>17.21</td>\n",
       "      <td>0.0</td>\n",
       "      <td>685.0</td>\n",
       "      <td>689.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>9948.0</td>\n",
       "      <td>52.6</td>\n",
       "      <td>28.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>May-1999</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>16.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>21.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>3000.0</td>\n",
       "      <td>3</td>\n",
       "      <td>12.99</td>\n",
       "      <td>101.07</td>\n",
       "      <td>C</td>\n",
       "      <td>C2</td>\n",
       "      <td>54.0</td>\n",
       "      <td>None</td>\n",
       "      <td>1</td>\n",
       "      <td>29000.0</td>\n",
       "      <td>2</td>\n",
       "      <td>2016-03-01</td>\n",
       "      <td>0</td>\n",
       "      <td>10</td>\n",
       "      <td>301.0</td>\n",
       "      <td>21</td>\n",
       "      <td>32.16</td>\n",
       "      <td>0.0</td>\n",
       "      <td>690.0</td>\n",
       "      <td>694.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2942.0</td>\n",
       "      <td>32.0</td>\n",
       "      <td>27.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>Aug-1977</td>\n",
       "      <td>11.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>5</td>\n",
       "      <td>11000.0</td>\n",
       "      <td>3</td>\n",
       "      <td>7.99</td>\n",
       "      <td>344.65</td>\n",
       "      <td>A</td>\n",
       "      <td>A5</td>\n",
       "      <td>51727.0</td>\n",
       "      <td>7 years</td>\n",
       "      <td>0</td>\n",
       "      <td>39000.0</td>\n",
       "      <td>2</td>\n",
       "      <td>2017-04-01</td>\n",
       "      <td>0</td>\n",
       "      <td>9</td>\n",
       "      <td>512.0</td>\n",
       "      <td>21</td>\n",
       "      <td>17.14</td>\n",
       "      <td>0.0</td>\n",
       "      <td>730.0</td>\n",
       "      <td>734.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4047.0</td>\n",
       "      <td>31.1</td>\n",
       "      <td>52.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>Jul-1998</td>\n",
       "      <td>10.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>48.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>6</td>\n",
       "      <td>2050.0</td>\n",
       "      <td>3</td>\n",
       "      <td>7.69</td>\n",
       "      <td>63.95</td>\n",
       "      <td>A</td>\n",
       "      <td>A4</td>\n",
       "      <td>180083.0</td>\n",
       "      <td>9 years</td>\n",
       "      <td>0</td>\n",
       "      <td>35000.0</td>\n",
       "      <td>0</td>\n",
       "      <td>2014-10-01</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>517.0</td>\n",
       "      <td>14</td>\n",
       "      <td>17.49</td>\n",
       "      <td>0.0</td>\n",
       "      <td>755.0</td>\n",
       "      <td>759.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3111.0</td>\n",
       "      <td>8.5</td>\n",
       "      <td>23.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>Oct-2006</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>18.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>7</td>\n",
       "      <td>11500.0</td>\n",
       "      <td>3</td>\n",
       "      <td>14.98</td>\n",
       "      <td>398.54</td>\n",
       "      <td>C</td>\n",
       "      <td>C3</td>\n",
       "      <td>214017.0</td>\n",
       "      <td>1 year</td>\n",
       "      <td>1</td>\n",
       "      <td>30000.0</td>\n",
       "      <td>2</td>\n",
       "      <td>2014-01-01</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>100.0</td>\n",
       "      <td>4</td>\n",
       "      <td>32.6</td>\n",
       "      <td>0.0</td>\n",
       "      <td>665.0</td>\n",
       "      <td>669.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>14021.0</td>\n",
       "      <td>59.7</td>\n",
       "      <td>33.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>Dec-1994</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>16.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>21.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>8</td>\n",
       "      <td>12000.0</td>\n",
       "      <td>3</td>\n",
       "      <td>12.99</td>\n",
       "      <td>404.27</td>\n",
       "      <td>C</td>\n",
       "      <td>C2</td>\n",
       "      <td>188.0</td>\n",
       "      <td>5 years</td>\n",
       "      <td>2</td>\n",
       "      <td>60000.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2016-05-01</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>792.0</td>\n",
       "      <td>13</td>\n",
       "      <td>19.22</td>\n",
       "      <td>0.0</td>\n",
       "      <td>690.0</td>\n",
       "      <td>694.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>27176.0</td>\n",
       "      <td>46.0</td>\n",
       "      <td>21.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>Apr-1994</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>17.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>None</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>9</td>\n",
       "      <td>6500.0</td>\n",
       "      <td>3</td>\n",
       "      <td>10.99</td>\n",
       "      <td>212.78</td>\n",
       "      <td>B</td>\n",
       "      <td>B4</td>\n",
       "      <td>54.0</td>\n",
       "      <td>None</td>\n",
       "      <td>1</td>\n",
       "      <td>15300.0</td>\n",
       "      <td>2</td>\n",
       "      <td>2015-11-01</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>59.0</td>\n",
       "      <td>11</td>\n",
       "      <td>24.39</td>\n",
       "      <td>0.0</td>\n",
       "      <td>725.0</td>\n",
       "      <td>729.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2936.0</td>\n",
       "      <td>30.6</td>\n",
       "      <td>63.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>Jan-1993</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>21.0</td>\n",
       "      <td>24.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>39.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>8.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  id loanAmnt term interestRate installment grade subGrade employmentTitle  \\\n",
       "0  0  35000.0    5        19.52      917.97     E       E2           320.0   \n",
       "1  1  18000.0    5        18.49       461.9     D       D2        219843.0   \n",
       "2  2  12000.0    5        16.99      298.17     D       D3         31698.0   \n",
       "3  3  11000.0    3         7.26      340.96     A       A4         46854.0   \n",
       "4  4   3000.0    3        12.99      101.07     C       C2            54.0   \n",
       "5  5  11000.0    3         7.99      344.65     A       A5         51727.0   \n",
       "6  6   2050.0    3         7.69       63.95     A       A4        180083.0   \n",
       "7  7  11500.0    3        14.98      398.54     C       C3        214017.0   \n",
       "8  8  12000.0    3        12.99      404.27     C       C2           188.0   \n",
       "9  9   6500.0    3        10.99      212.78     B       B4            54.0   \n",
       "\n",
       "  employmentLength homeOwnership annualIncome verificationStatus   issueDate  \\\n",
       "0          2 years             2     110000.0                  2  2014-07-01   \n",
       "1          5 years             0      46000.0                  2  2012-08-01   \n",
       "2          8 years             0      74000.0                  2  2015-10-01   \n",
       "3        10+ years             1     118000.0                  1  2015-08-01   \n",
       "4             None             1      29000.0                  2  2016-03-01   \n",
       "5          7 years             0      39000.0                  2  2017-04-01   \n",
       "6          9 years             0      35000.0                  0  2014-10-01   \n",
       "7           1 year             1      30000.0                  2  2014-01-01   \n",
       "8          5 years             2      60000.0                  1  2016-05-01   \n",
       "9             None             1      15300.0                  2  2015-11-01   \n",
       "\n",
       "  isDefault purpose postCode regionCode    dti delinquency_2years  \\\n",
       "0         1       1    137.0         32  17.05                0.0   \n",
       "1         0       0    156.0         18  27.83                0.0   \n",
       "2         0       0    337.0         14  22.77                0.0   \n",
       "3         0       4    148.0         11  17.21                0.0   \n",
       "4         0      10    301.0         21  32.16                0.0   \n",
       "5         0       9    512.0         21  17.14                0.0   \n",
       "6         0       0    517.0         14  17.49                0.0   \n",
       "7         0       0    100.0          4   32.6                0.0   \n",
       "8         1       0    792.0         13  19.22                0.0   \n",
       "9         0       0     59.0         11  24.39                0.0   \n",
       "\n",
       "  ficoRangeLow ficoRangeHigh openAcc pubRec pubRecBankruptcies revolBal  \\\n",
       "0        730.0         734.0     7.0    0.0                0.0  24178.0   \n",
       "1        700.0         704.0    13.0    0.0                0.0  15096.0   \n",
       "2        675.0         679.0    11.0    0.0                0.0   4606.0   \n",
       "3        685.0         689.0     9.0    0.0                0.0   9948.0   \n",
       "4        690.0         694.0    12.0    0.0                0.0   2942.0   \n",
       "5        730.0         734.0    19.0    0.0                0.0   4047.0   \n",
       "6        755.0         759.0    12.0    0.0                0.0   3111.0   \n",
       "7        665.0         669.0     8.0    1.0                1.0  14021.0   \n",
       "8        690.0         694.0    15.0    0.0                0.0  27176.0   \n",
       "9        725.0         729.0     7.0    0.0                0.0   2936.0   \n",
       "\n",
       "  revolUtil totalAcc initialListStatus applicationType earliesCreditLine  \\\n",
       "0      48.9     27.0                 0               0          Aug-2001   \n",
       "1      38.9     18.0                 1               0          May-2002   \n",
       "2      51.8     27.0                 0               0          May-2006   \n",
       "3      52.6     28.0                 1               0          May-1999   \n",
       "4      32.0     27.0                 0               0          Aug-1977   \n",
       "5      31.1     52.0                 1               0          Jul-1998   \n",
       "6       8.5     23.0                 0               0          Oct-2006   \n",
       "7      59.7     33.0                 1               0          Dec-1994   \n",
       "8      46.0     21.0                 1               0          Apr-1994   \n",
       "9      30.6     63.0                 0               0          Jan-1993   \n",
       "\n",
       "    title policyCode    n0    n1    n2    n3    n4    n5    n6    n7    n8  \\\n",
       "0     1.0        1.0   0.0   2.0   2.0   2.0   4.0   9.0   8.0   4.0  12.0   \n",
       "1  1723.0        1.0  None  None  None  None  10.0  None  None  None  None   \n",
       "2     0.0        1.0   0.0   0.0   3.0   3.0   0.0   0.0  21.0   4.0   5.0   \n",
       "3     4.0        1.0   6.0   4.0   6.0   6.0   4.0  16.0   4.0   7.0  21.0   \n",
       "4    11.0        1.0   1.0   2.0   7.0   7.0   2.0   4.0   9.0  10.0  15.0   \n",
       "5    10.0        1.0  12.0   1.0   2.0   2.0   1.0   1.0  48.0   2.0   3.0   \n",
       "6     0.0        1.0   0.0   1.0   3.0   3.0   7.0  11.0   3.0  10.0  18.0   \n",
       "7     0.0        1.0   0.0   4.0   4.0   4.0   4.0  16.0  10.0   5.0  21.0   \n",
       "8     0.0        1.0   0.0   7.0  13.0  13.0   7.0   7.0   2.0  13.0  17.0   \n",
       "9     0.0        1.0   0.0   4.0   5.0   5.0   4.0  21.0  24.0   6.0  39.0   \n",
       "\n",
       "     n9   n10   n11   n12   n13   n14  \n",
       "0   2.0   7.0   0.0   0.0   0.0   2.0  \n",
       "1  None  13.0  None  None  None  None  \n",
       "2   3.0  11.0   0.0   0.0   0.0   4.0  \n",
       "3   6.0   9.0   0.0   0.0   0.0   1.0  \n",
       "4   7.0  12.0   0.0   0.0   0.0   4.0  \n",
       "5   2.0  19.0   0.0   0.0   0.0   0.0  \n",
       "6   3.0  12.0   0.0   0.0   0.0   3.0  \n",
       "7   4.0   8.0   0.0   0.0   0.0   2.0  \n",
       "8  11.0  15.0  None   0.0   0.0   6.0  \n",
       "9   5.0   7.0   0.0   0.0   0.0   8.0  "
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_dataset = spark_session.read.csv('s3://dmetasoul-bucket/demo/risk/tianchi/train.csv', header=True, inferSchema=False)\n",
    "test_dataset = spark_session.read.csv('s3://dmetasoul-bucket/demo/risk/tianchi/testA.csv', header=True, inferSchema=False)\n",
    "\n",
    "train_dataset.limit(10).toPandas()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "50b6e851-0cf4-40a5-9fe7-cf87c0ce1137",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>loanAmnt</th>\n",
       "      <th>term</th>\n",
       "      <th>interestRate</th>\n",
       "      <th>installment</th>\n",
       "      <th>grade</th>\n",
       "      <th>subGrade</th>\n",
       "      <th>employmentTitle</th>\n",
       "      <th>employmentLength</th>\n",
       "      <th>homeOwnership</th>\n",
       "      <th>annualIncome</th>\n",
       "      <th>verificationStatus</th>\n",
       "      <th>issueDate</th>\n",
       "      <th>purpose</th>\n",
       "      <th>postCode</th>\n",
       "      <th>regionCode</th>\n",
       "      <th>dti</th>\n",
       "      <th>delinquency_2years</th>\n",
       "      <th>ficoRangeLow</th>\n",
       "      <th>ficoRangeHigh</th>\n",
       "      <th>openAcc</th>\n",
       "      <th>pubRec</th>\n",
       "      <th>pubRecBankruptcies</th>\n",
       "      <th>revolBal</th>\n",
       "      <th>revolUtil</th>\n",
       "      <th>totalAcc</th>\n",
       "      <th>initialListStatus</th>\n",
       "      <th>applicationType</th>\n",
       "      <th>earliesCreditLine</th>\n",
       "      <th>title</th>\n",
       "      <th>policyCode</th>\n",
       "      <th>n0</th>\n",
       "      <th>n1</th>\n",
       "      <th>n2</th>\n",
       "      <th>n3</th>\n",
       "      <th>n4</th>\n",
       "      <th>n5</th>\n",
       "      <th>n6</th>\n",
       "      <th>n7</th>\n",
       "      <th>n8</th>\n",
       "      <th>n9</th>\n",
       "      <th>n10</th>\n",
       "      <th>n11</th>\n",
       "      <th>n12</th>\n",
       "      <th>n13</th>\n",
       "      <th>n14</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>800000</td>\n",
       "      <td>14000.0</td>\n",
       "      <td>3</td>\n",
       "      <td>10.99</td>\n",
       "      <td>458.28</td>\n",
       "      <td>B</td>\n",
       "      <td>B3</td>\n",
       "      <td>7027.0</td>\n",
       "      <td>10+ years</td>\n",
       "      <td>0</td>\n",
       "      <td>80000.0</td>\n",
       "      <td>0</td>\n",
       "      <td>2014-07-01</td>\n",
       "      <td>0</td>\n",
       "      <td>163.0</td>\n",
       "      <td>21</td>\n",
       "      <td>10.56</td>\n",
       "      <td>1.0</td>\n",
       "      <td>715.0</td>\n",
       "      <td>719.0</td>\n",
       "      <td>17.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>9846.0</td>\n",
       "      <td>30.7</td>\n",
       "      <td>29.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>Nov-1974</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>17.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>800001</td>\n",
       "      <td>20000.0</td>\n",
       "      <td>5</td>\n",
       "      <td>14.65</td>\n",
       "      <td>472.14</td>\n",
       "      <td>C</td>\n",
       "      <td>C5</td>\n",
       "      <td>60426.0</td>\n",
       "      <td>10+ years</td>\n",
       "      <td>0</td>\n",
       "      <td>50000.0</td>\n",
       "      <td>0</td>\n",
       "      <td>2015-07-01</td>\n",
       "      <td>2</td>\n",
       "      <td>235.0</td>\n",
       "      <td>8</td>\n",
       "      <td>21.4</td>\n",
       "      <td>2.0</td>\n",
       "      <td>670.0</td>\n",
       "      <td>674.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>8946.0</td>\n",
       "      <td>56.6</td>\n",
       "      <td>14.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>Jul-2001</td>\n",
       "      <td>5.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>800002</td>\n",
       "      <td>12000.0</td>\n",
       "      <td>3</td>\n",
       "      <td>19.99</td>\n",
       "      <td>445.91</td>\n",
       "      <td>D</td>\n",
       "      <td>D4</td>\n",
       "      <td>23547.0</td>\n",
       "      <td>2 years</td>\n",
       "      <td>1</td>\n",
       "      <td>60000.0</td>\n",
       "      <td>2</td>\n",
       "      <td>2016-10-01</td>\n",
       "      <td>0</td>\n",
       "      <td>526.0</td>\n",
       "      <td>20</td>\n",
       "      <td>33.5</td>\n",
       "      <td>0.0</td>\n",
       "      <td>710.0</td>\n",
       "      <td>714.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>970.0</td>\n",
       "      <td>17.6</td>\n",
       "      <td>43.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>Aug-2006</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>36.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>7.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>800003</td>\n",
       "      <td>17500.0</td>\n",
       "      <td>5</td>\n",
       "      <td>14.31</td>\n",
       "      <td>410.02</td>\n",
       "      <td>C</td>\n",
       "      <td>C4</td>\n",
       "      <td>636.0</td>\n",
       "      <td>4 years</td>\n",
       "      <td>0</td>\n",
       "      <td>37000.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2014-11-01</td>\n",
       "      <td>4</td>\n",
       "      <td>248.0</td>\n",
       "      <td>11</td>\n",
       "      <td>13.95</td>\n",
       "      <td>0.0</td>\n",
       "      <td>685.0</td>\n",
       "      <td>689.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>10249.0</td>\n",
       "      <td>52.3</td>\n",
       "      <td>18.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>Jul-2002</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>14.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>800004</td>\n",
       "      <td>35000.0</td>\n",
       "      <td>3</td>\n",
       "      <td>17.09</td>\n",
       "      <td>1249.42</td>\n",
       "      <td>D</td>\n",
       "      <td>D1</td>\n",
       "      <td>368446.0</td>\n",
       "      <td>&lt; 1 year</td>\n",
       "      <td>1</td>\n",
       "      <td>80000.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2017-10-01</td>\n",
       "      <td>0</td>\n",
       "      <td>115.0</td>\n",
       "      <td>8</td>\n",
       "      <td>24.97</td>\n",
       "      <td>0.0</td>\n",
       "      <td>685.0</td>\n",
       "      <td>689.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>33199.0</td>\n",
       "      <td>35.6</td>\n",
       "      <td>22.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>Dec-2000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>16.0</td>\n",
       "      <td>18.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>800005</td>\n",
       "      <td>16000.0</td>\n",
       "      <td>3</td>\n",
       "      <td>5.32</td>\n",
       "      <td>481.84</td>\n",
       "      <td>A</td>\n",
       "      <td>A1</td>\n",
       "      <td>236.0</td>\n",
       "      <td>10+ years</td>\n",
       "      <td>0</td>\n",
       "      <td>90000.0</td>\n",
       "      <td>0</td>\n",
       "      <td>2017-05-01</td>\n",
       "      <td>0</td>\n",
       "      <td>480.0</td>\n",
       "      <td>8</td>\n",
       "      <td>15.28</td>\n",
       "      <td>0.0</td>\n",
       "      <td>775.0</td>\n",
       "      <td>779.0</td>\n",
       "      <td>17.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6207.0</td>\n",
       "      <td>12.1</td>\n",
       "      <td>35.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>Jul-2000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>14.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>25.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>17.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>800006</td>\n",
       "      <td>25000.0</td>\n",
       "      <td>5</td>\n",
       "      <td>14.99</td>\n",
       "      <td>594.62</td>\n",
       "      <td>C</td>\n",
       "      <td>C4</td>\n",
       "      <td>13.0</td>\n",
       "      <td>10+ years</td>\n",
       "      <td>1</td>\n",
       "      <td>126500.0</td>\n",
       "      <td>2</td>\n",
       "      <td>2016-11-01</td>\n",
       "      <td>0</td>\n",
       "      <td>310.0</td>\n",
       "      <td>24</td>\n",
       "      <td>22.94</td>\n",
       "      <td>0.0</td>\n",
       "      <td>720.0</td>\n",
       "      <td>724.0</td>\n",
       "      <td>14.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>31465.0</td>\n",
       "      <td>40.3</td>\n",
       "      <td>25.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>Jan-1998</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>17.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>14.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>800007</td>\n",
       "      <td>4500.0</td>\n",
       "      <td>3</td>\n",
       "      <td>7.35</td>\n",
       "      <td>139.67</td>\n",
       "      <td>A</td>\n",
       "      <td>A4</td>\n",
       "      <td>3403.0</td>\n",
       "      <td>6 years</td>\n",
       "      <td>0</td>\n",
       "      <td>50000.0</td>\n",
       "      <td>0</td>\n",
       "      <td>2017-10-01</td>\n",
       "      <td>4</td>\n",
       "      <td>81.0</td>\n",
       "      <td>15</td>\n",
       "      <td>13.32</td>\n",
       "      <td>0.0</td>\n",
       "      <td>750.0</td>\n",
       "      <td>754.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5200.0</td>\n",
       "      <td>22.1</td>\n",
       "      <td>17.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Feb-2007</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>800008</td>\n",
       "      <td>12000.0</td>\n",
       "      <td>5</td>\n",
       "      <td>16.55</td>\n",
       "      <td>295.34</td>\n",
       "      <td>D</td>\n",
       "      <td>D2</td>\n",
       "      <td>346.0</td>\n",
       "      <td>3 years</td>\n",
       "      <td>1</td>\n",
       "      <td>58000.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2015-10-01</td>\n",
       "      <td>0</td>\n",
       "      <td>138.0</td>\n",
       "      <td>0</td>\n",
       "      <td>23.17</td>\n",
       "      <td>2.0</td>\n",
       "      <td>680.0</td>\n",
       "      <td>684.0</td>\n",
       "      <td>14.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1382.0</td>\n",
       "      <td>18.2</td>\n",
       "      <td>29.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>May-2004</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>21.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>14.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>800009</td>\n",
       "      <td>16500.0</td>\n",
       "      <td>3</td>\n",
       "      <td>7.69</td>\n",
       "      <td>514.7</td>\n",
       "      <td>A</td>\n",
       "      <td>A4</td>\n",
       "      <td>8103.0</td>\n",
       "      <td>10+ years</td>\n",
       "      <td>0</td>\n",
       "      <td>140000.0</td>\n",
       "      <td>0</td>\n",
       "      <td>2014-08-01</td>\n",
       "      <td>4</td>\n",
       "      <td>76.0</td>\n",
       "      <td>26</td>\n",
       "      <td>13.93</td>\n",
       "      <td>0.0</td>\n",
       "      <td>715.0</td>\n",
       "      <td>719.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>16924.0</td>\n",
       "      <td>75.6</td>\n",
       "      <td>28.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>Sep-1992</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       id loanAmnt term interestRate installment grade subGrade  \\\n",
       "0  800000  14000.0    3        10.99      458.28     B       B3   \n",
       "1  800001  20000.0    5        14.65      472.14     C       C5   \n",
       "2  800002  12000.0    3        19.99      445.91     D       D4   \n",
       "3  800003  17500.0    5        14.31      410.02     C       C4   \n",
       "4  800004  35000.0    3        17.09     1249.42     D       D1   \n",
       "5  800005  16000.0    3         5.32      481.84     A       A1   \n",
       "6  800006  25000.0    5        14.99      594.62     C       C4   \n",
       "7  800007   4500.0    3         7.35      139.67     A       A4   \n",
       "8  800008  12000.0    5        16.55      295.34     D       D2   \n",
       "9  800009  16500.0    3         7.69       514.7     A       A4   \n",
       "\n",
       "  employmentTitle employmentLength homeOwnership annualIncome  \\\n",
       "0          7027.0        10+ years             0      80000.0   \n",
       "1         60426.0        10+ years             0      50000.0   \n",
       "2         23547.0          2 years             1      60000.0   \n",
       "3           636.0          4 years             0      37000.0   \n",
       "4        368446.0         < 1 year             1      80000.0   \n",
       "5           236.0        10+ years             0      90000.0   \n",
       "6            13.0        10+ years             1     126500.0   \n",
       "7          3403.0          6 years             0      50000.0   \n",
       "8           346.0          3 years             1      58000.0   \n",
       "9          8103.0        10+ years             0     140000.0   \n",
       "\n",
       "  verificationStatus   issueDate purpose postCode regionCode    dti  \\\n",
       "0                  0  2014-07-01       0    163.0         21  10.56   \n",
       "1                  0  2015-07-01       2    235.0          8   21.4   \n",
       "2                  2  2016-10-01       0    526.0         20   33.5   \n",
       "3                  1  2014-11-01       4    248.0         11  13.95   \n",
       "4                  1  2017-10-01       0    115.0          8  24.97   \n",
       "5                  0  2017-05-01       0    480.0          8  15.28   \n",
       "6                  2  2016-11-01       0    310.0         24  22.94   \n",
       "7                  0  2017-10-01       4     81.0         15  13.32   \n",
       "8                  1  2015-10-01       0    138.0          0  23.17   \n",
       "9                  0  2014-08-01       4     76.0         26  13.93   \n",
       "\n",
       "  delinquency_2years ficoRangeLow ficoRangeHigh openAcc pubRec  \\\n",
       "0                1.0        715.0         719.0    17.0    0.0   \n",
       "1                2.0        670.0         674.0     5.0    0.0   \n",
       "2                0.0        710.0         714.0    12.0    0.0   \n",
       "3                0.0        685.0         689.0    10.0    1.0   \n",
       "4                0.0        685.0         689.0    19.0    0.0   \n",
       "5                0.0        775.0         779.0    17.0    0.0   \n",
       "6                0.0        720.0         724.0    14.0    0.0   \n",
       "7                0.0        750.0         754.0     8.0    0.0   \n",
       "8                2.0        680.0         684.0    14.0    0.0   \n",
       "9                0.0        715.0         719.0    10.0    0.0   \n",
       "\n",
       "  pubRecBankruptcies revolBal revolUtil totalAcc initialListStatus  \\\n",
       "0                0.0   9846.0      30.7     29.0                 0   \n",
       "1                0.0   8946.0      56.6     14.0                 0   \n",
       "2                0.0    970.0      17.6     43.0                 1   \n",
       "3                1.0  10249.0      52.3     18.0                 0   \n",
       "4                0.0  33199.0      35.6     22.0                 0   \n",
       "5                0.0   6207.0      12.1     35.0                 0   \n",
       "6                0.0  31465.0      40.3     25.0                 0   \n",
       "7                0.0   5200.0      22.1     17.0                 1   \n",
       "8                0.0   1382.0      18.2     29.0                 0   \n",
       "9                0.0  16924.0      75.6     28.0                 0   \n",
       "\n",
       "  applicationType earliesCreditLine title policyCode   n0   n1    n2    n3  \\\n",
       "0               0          Nov-1974   0.0        1.0  1.0  4.0   6.0   6.0   \n",
       "1               0          Jul-2001   5.0        1.0  2.0  1.0   3.0   3.0   \n",
       "2               0          Aug-2006   0.0        1.0  0.0  1.0   4.0   4.0   \n",
       "3               0          Jul-2002   4.0        1.0  0.0  2.0   2.0   2.0   \n",
       "4               0          Dec-2000   0.0        1.0  0.0  8.0  11.0  11.0   \n",
       "5               0          Jul-2000   0.0        1.0  0.0  3.0   5.0   5.0   \n",
       "6               0          Jan-1998   0.0        1.0  0.0  5.0   7.0   7.0   \n",
       "7               1          Feb-2007   4.0        1.0  0.0  3.0   3.0   3.0   \n",
       "8               0          May-2004   0.0        1.0  1.0  4.0   4.0   4.0   \n",
       "9               0          Sep-1992   4.0        1.0  2.0  2.0   4.0   4.0   \n",
       "\n",
       "    n4    n5    n6    n7    n8    n9   n10  n11  n12  n13  n14  \n",
       "0  6.0   8.0   4.0  15.0  19.0   6.0  17.0  0.0  0.0  1.0  3.0  \n",
       "1  1.0   1.0   3.0   3.0   9.0   3.0   5.0  0.0  0.0  2.0  2.0  \n",
       "2  1.0   1.0  36.0   5.0   6.0   4.0  12.0  0.0  0.0  0.0  7.0  \n",
       "3  4.0   7.0   2.0   8.0  14.0   2.0  10.0  0.0  0.0  0.0  3.0  \n",
       "4  9.0  11.0   3.0  16.0  18.0  11.0  19.0  0.0  0.0  0.0  1.0  \n",
       "5  8.0  14.0   6.0  13.0  25.0   5.0  17.0  0.0  0.0  0.0  3.0  \n",
       "6  6.0   6.0   6.0  10.0  17.0   7.0  14.0  0.0  0.0  0.0  1.0  \n",
       "7  4.0   6.0   8.0   4.0   8.0   3.0   8.0  0.0  0.0  0.0  1.0  \n",
       "8  4.0   5.0  21.0   5.0   8.0   4.0  14.0  0.0  0.0  0.0  4.0  \n",
       "9  2.0   7.0  10.0   4.0  13.0   4.0  10.0  0.0  0.0  0.0  3.0  "
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test_dataset.limit(10).toPandas()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6714dcbf-437d-4a9d-8c89-cb3d959dbce8",
   "metadata": {},
   "source": [
    "#### Define columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "4c6997ef-479f-4cd4-aced-0d9cfe08a7f6",
   "metadata": {},
   "outputs": [],
   "source": [
    "label_colunms = ['isDefault']\n",
    "special_colunms = ['id']\n",
    "continuous_colunms = ['loanAmnt', 'interestRate', 'installment', 'employmentTitle', 'annualIncome', 'postCode', 'dti', \n",
    "                      'delinquency_2years', 'ficoRangeLow', 'ficoRangeHigh', 'openAcc', 'pubRec', 'pubRecBankruptcies',\n",
    "                      'revolBal', 'revolUtil', 'totalAcc', 'title', 'policyCode'] \n",
    "categorical_colunms = [c for c in train_dataset.columns if c not in label_colunms + special_colunms + continuous_colunms]\n",
    "colunms_to_woe = [c for c in train_dataset.columns if c not in label_colunms + special_colunms]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "89086c6b-3613-4967-b12a-43d3fe067aae",
   "metadata": {},
   "source": [
    "#### Handle missing data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "a6ca8b82-1b48-4bd8-b14d-8758989bf59f",
   "metadata": {},
   "outputs": [],
   "source": [
    "train_dataset = train_dataset.na.fill(\"0.0\", subset=continuous_colunms)\n",
    "train_dataset = train_dataset.na.fill(\"Empty\", subset=categorical_colunms)\n",
    "\n",
    "test_dataset = test_dataset.na.fill(\"0.0\", subset=continuous_colunms)\n",
    "test_dataset = test_dataset.na.fill(\"Empty\", subset=categorical_colunms)\n",
    "\n",
    "train_dataset = train_dataset.select(*(F.col(c).cast('double').alias(c) if c in continuous_colunms + label_colunms else F.col(c) for c in train_dataset.columns))\n",
    "test_dataset = test_dataset.select(*(F.col(c).cast('double').alias(c) if c in continuous_colunms + label_colunms else F.col(c) for c in test_dataset.columns))\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f032e0b4-8432-464f-83a1-c4e37efb4887",
   "metadata": {},
   "source": [
    "#### Convert continuous features to categorical"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "10c5c920-30d9-4c2b-8f4c-2ecedde05f45",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "22/06/10 11:18:40 WARN QuantileDiscretizer: Some quantiles were identical. Bucketing to 3 buckets as a result.\n",
      "22/06/10 11:18:40 WARN QuantileDiscretizer: Some quantiles were identical. Bucketing to 3 buckets as a result.\n",
      "22/06/10 11:18:40 WARN QuantileDiscretizer: Some quantiles were identical. Bucketing to 3 buckets as a result.\n",
      "22/06/10 11:18:40 WARN QuantileDiscretizer: Some quantiles were identical. Bucketing to 6 buckets as a result.\n",
      "22/06/10 11:18:40 WARN QuantileDiscretizer: Some quantiles were identical. Bucketing to 2 buckets as a result.\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>loanAmnt</th>\n",
       "      <th>term</th>\n",
       "      <th>interestRate</th>\n",
       "      <th>installment</th>\n",
       "      <th>grade</th>\n",
       "      <th>subGrade</th>\n",
       "      <th>employmentTitle</th>\n",
       "      <th>employmentLength</th>\n",
       "      <th>homeOwnership</th>\n",
       "      <th>annualIncome</th>\n",
       "      <th>verificationStatus</th>\n",
       "      <th>issueDate</th>\n",
       "      <th>isDefault</th>\n",
       "      <th>purpose</th>\n",
       "      <th>postCode</th>\n",
       "      <th>regionCode</th>\n",
       "      <th>dti</th>\n",
       "      <th>delinquency_2years</th>\n",
       "      <th>ficoRangeLow</th>\n",
       "      <th>ficoRangeHigh</th>\n",
       "      <th>openAcc</th>\n",
       "      <th>pubRec</th>\n",
       "      <th>pubRecBankruptcies</th>\n",
       "      <th>revolBal</th>\n",
       "      <th>revolUtil</th>\n",
       "      <th>totalAcc</th>\n",
       "      <th>initialListStatus</th>\n",
       "      <th>applicationType</th>\n",
       "      <th>earliesCreditLine</th>\n",
       "      <th>title</th>\n",
       "      <th>policyCode</th>\n",
       "      <th>n0</th>\n",
       "      <th>n1</th>\n",
       "      <th>n2</th>\n",
       "      <th>n3</th>\n",
       "      <th>n4</th>\n",
       "      <th>n5</th>\n",
       "      <th>n6</th>\n",
       "      <th>n7</th>\n",
       "      <th>n8</th>\n",
       "      <th>n9</th>\n",
       "      <th>n10</th>\n",
       "      <th>n11</th>\n",
       "      <th>n12</th>\n",
       "      <th>n13</th>\n",
       "      <th>n14</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>5</td>\n",
       "      <td>9.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>E</td>\n",
       "      <td>E2</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2 years</td>\n",
       "      <td>2</td>\n",
       "      <td>8.0</td>\n",
       "      <td>2</td>\n",
       "      <td>2014-07-01</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>3.0</td>\n",
       "      <td>32</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>Aug-2001</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>7.0</td>\n",
       "      <td>5</td>\n",
       "      <td>8.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>D</td>\n",
       "      <td>D2</td>\n",
       "      <td>8.0</td>\n",
       "      <td>5 years</td>\n",
       "      <td>0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2</td>\n",
       "      <td>2012-08-01</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>18</td>\n",
       "      <td>8.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>May-2002</td>\n",
       "      <td>5.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>Empty</td>\n",
       "      <td>Empty</td>\n",
       "      <td>Empty</td>\n",
       "      <td>Empty</td>\n",
       "      <td>10.0</td>\n",
       "      <td>Empty</td>\n",
       "      <td>Empty</td>\n",
       "      <td>Empty</td>\n",
       "      <td>Empty</td>\n",
       "      <td>Empty</td>\n",
       "      <td>13.0</td>\n",
       "      <td>Empty</td>\n",
       "      <td>Empty</td>\n",
       "      <td>Empty</td>\n",
       "      <td>Empty</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>5.0</td>\n",
       "      <td>5</td>\n",
       "      <td>8.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>D</td>\n",
       "      <td>D3</td>\n",
       "      <td>6.0</td>\n",
       "      <td>8 years</td>\n",
       "      <td>0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>2</td>\n",
       "      <td>2015-10-01</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>14</td>\n",
       "      <td>7.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>May-2006</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>21.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>4.0</td>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>A</td>\n",
       "      <td>A4</td>\n",
       "      <td>6.0</td>\n",
       "      <td>10+ years</td>\n",
       "      <td>1</td>\n",
       "      <td>8.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2015-08-01</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4</td>\n",
       "      <td>3.0</td>\n",
       "      <td>11</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>May-1999</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>16.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>21.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>C</td>\n",
       "      <td>C2</td>\n",
       "      <td>1.0</td>\n",
       "      <td>Empty</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2</td>\n",
       "      <td>2016-03-01</td>\n",
       "      <td>0.0</td>\n",
       "      <td>10</td>\n",
       "      <td>6.0</td>\n",
       "      <td>21</td>\n",
       "      <td>9.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>Aug-1977</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>5</td>\n",
       "      <td>4.0</td>\n",
       "      <td>3</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>A</td>\n",
       "      <td>A5</td>\n",
       "      <td>6.0</td>\n",
       "      <td>7 years</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2</td>\n",
       "      <td>2017-04-01</td>\n",
       "      <td>0.0</td>\n",
       "      <td>9</td>\n",
       "      <td>8.0</td>\n",
       "      <td>21</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>Jul-1998</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>48.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>6</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>A</td>\n",
       "      <td>A4</td>\n",
       "      <td>8.0</td>\n",
       "      <td>9 years</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>2014-10-01</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>14</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>Oct-2006</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>18.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>7</td>\n",
       "      <td>4.0</td>\n",
       "      <td>3</td>\n",
       "      <td>6.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>C</td>\n",
       "      <td>C3</td>\n",
       "      <td>8.0</td>\n",
       "      <td>1 year</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2</td>\n",
       "      <td>2014-01-01</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4</td>\n",
       "      <td>9.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>Dec-1994</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>16.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>21.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>8</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3</td>\n",
       "      <td>5.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>C</td>\n",
       "      <td>C2</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5 years</td>\n",
       "      <td>2</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2016-05-01</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>13</td>\n",
       "      <td>5.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>Apr-1994</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>17.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>Empty</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>9</td>\n",
       "      <td>2.0</td>\n",
       "      <td>3</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>B</td>\n",
       "      <td>B4</td>\n",
       "      <td>1.0</td>\n",
       "      <td>Empty</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2</td>\n",
       "      <td>2015-11-01</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>11</td>\n",
       "      <td>7.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>Jan-1993</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>21.0</td>\n",
       "      <td>24.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>39.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>8.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  id  loanAmnt term  interestRate  installment grade subGrade  \\\n",
       "0  0       9.0    5           9.0          9.0     E       E2   \n",
       "1  1       7.0    5           8.0          6.0     D       D2   \n",
       "2  2       5.0    5           8.0          3.0     D       D3   \n",
       "3  3       4.0    3           0.0          4.0     A       A4   \n",
       "4  4       0.0    3           5.0          0.0     C       C2   \n",
       "5  5       4.0    3           1.0          4.0     A       A5   \n",
       "6  6       0.0    3           1.0          0.0     A       A4   \n",
       "7  7       4.0    3           6.0          5.0     C       C3   \n",
       "8  8       5.0    3           5.0          5.0     C       C2   \n",
       "9  9       2.0    3           3.0          1.0     B       B4   \n",
       "\n",
       "   employmentTitle employmentLength homeOwnership  annualIncome  \\\n",
       "0              2.0          2 years             2           8.0   \n",
       "1              8.0          5 years             0           2.0   \n",
       "2              6.0          8 years             0           6.0   \n",
       "3              6.0        10+ years             1           8.0   \n",
       "4              1.0            Empty             1           0.0   \n",
       "5              6.0          7 years             0           1.0   \n",
       "6              8.0          9 years             0           1.0   \n",
       "7              8.0           1 year             1           0.0   \n",
       "8              1.0          5 years             2           4.0   \n",
       "9              1.0            Empty             1           0.0   \n",
       "\n",
       "  verificationStatus   issueDate  isDefault purpose  postCode regionCode  dti  \\\n",
       "0                  2  2014-07-01        1.0       1       3.0         32  4.0   \n",
       "1                  2  2012-08-01        0.0       0       3.0         18  8.0   \n",
       "2                  2  2015-10-01        0.0       0       6.0         14  7.0   \n",
       "3                  1  2015-08-01        0.0       4       3.0         11  4.0   \n",
       "4                  2  2016-03-01        0.0      10       6.0         21  9.0   \n",
       "5                  2  2017-04-01        0.0       9       8.0         21  4.0   \n",
       "6                  0  2014-10-01        0.0       0       8.0         14  4.0   \n",
       "7                  2  2014-01-01        0.0       0       2.0          4  9.0   \n",
       "8                  1  2016-05-01        1.0       0       9.0         13  5.0   \n",
       "9                  2  2015-11-01        0.0       0       1.0         11  7.0   \n",
       "\n",
       "   delinquency_2years  ficoRangeLow  ficoRangeHigh  openAcc  pubRec  \\\n",
       "0                 1.0           8.0            8.0      2.0     1.0   \n",
       "1                 1.0           6.0            6.0      7.0     1.0   \n",
       "2                 1.0           3.0            3.0      5.0     1.0   \n",
       "3                 1.0           4.0            4.0      4.0     1.0   \n",
       "4                 1.0           5.0            5.0      6.0     1.0   \n",
       "5                 1.0           8.0            8.0      9.0     1.0   \n",
       "6                 1.0           9.0            9.0      6.0     1.0   \n",
       "7                 1.0           1.0            1.0      3.0     2.0   \n",
       "8                 1.0           5.0            5.0      8.0     1.0   \n",
       "9                 1.0           8.0            8.0      2.0     1.0   \n",
       "\n",
       "   pubRecBankruptcies  revolBal  revolUtil  totalAcc initialListStatus  \\\n",
       "0                 1.0       8.0        4.0       6.0                 0   \n",
       "1                 1.0       6.0        3.0       3.0                 1   \n",
       "2                 1.0       1.0        4.0       6.0                 0   \n",
       "3                 1.0       4.0        5.0       6.0                 1   \n",
       "4                 1.0       0.0        2.0       6.0                 0   \n",
       "5                 1.0       1.0        2.0       9.0                 1   \n",
       "6                 1.0       1.0        0.0       5.0                 0   \n",
       "7                 2.0       6.0        6.0       7.0                 1   \n",
       "8                 1.0       8.0        4.0       4.0                 1   \n",
       "9                 1.0       0.0        2.0       9.0                 0   \n",
       "\n",
       "  applicationType earliesCreditLine  title  policyCode     n0     n1     n2  \\\n",
       "0               0          Aug-2001    1.0         1.0    0.0    2.0    2.0   \n",
       "1               0          May-2002    5.0         1.0  Empty  Empty  Empty   \n",
       "2               0          May-2006    1.0         1.0    0.0    0.0    3.0   \n",
       "3               0          May-1999    3.0         1.0    6.0    4.0    6.0   \n",
       "4               0          Aug-1977    4.0         1.0    1.0    2.0    7.0   \n",
       "5               0          Jul-1998    4.0         1.0   12.0    1.0    2.0   \n",
       "6               0          Oct-2006    1.0         1.0    0.0    1.0    3.0   \n",
       "7               0          Dec-1994    1.0         1.0    0.0    4.0    4.0   \n",
       "8               0          Apr-1994    1.0         1.0    0.0    7.0   13.0   \n",
       "9               0          Jan-1993    1.0         1.0    0.0    4.0    5.0   \n",
       "\n",
       "      n3    n4     n5     n6     n7     n8     n9   n10    n11    n12    n13  \\\n",
       "0    2.0   4.0    9.0    8.0    4.0   12.0    2.0   7.0    0.0    0.0    0.0   \n",
       "1  Empty  10.0  Empty  Empty  Empty  Empty  Empty  13.0  Empty  Empty  Empty   \n",
       "2    3.0   0.0    0.0   21.0    4.0    5.0    3.0  11.0    0.0    0.0    0.0   \n",
       "3    6.0   4.0   16.0    4.0    7.0   21.0    6.0   9.0    0.0    0.0    0.0   \n",
       "4    7.0   2.0    4.0    9.0   10.0   15.0    7.0  12.0    0.0    0.0    0.0   \n",
       "5    2.0   1.0    1.0   48.0    2.0    3.0    2.0  19.0    0.0    0.0    0.0   \n",
       "6    3.0   7.0   11.0    3.0   10.0   18.0    3.0  12.0    0.0    0.0    0.0   \n",
       "7    4.0   4.0   16.0   10.0    5.0   21.0    4.0   8.0    0.0    0.0    0.0   \n",
       "8   13.0   7.0    7.0    2.0   13.0   17.0   11.0  15.0  Empty    0.0    0.0   \n",
       "9    5.0   4.0   21.0   24.0    6.0   39.0    5.0   7.0    0.0    0.0    0.0   \n",
       "\n",
       "     n14  \n",
       "0    2.0  \n",
       "1  Empty  \n",
       "2    4.0  \n",
       "3    1.0  \n",
       "4    4.0  \n",
       "5    0.0  \n",
       "6    3.0  \n",
       "7    2.0  \n",
       "8    6.0  \n",
       "9    8.0  "
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "output_suffix = '_bucket'\n",
    "temp_cols = [ic + output_suffix for ic in continuous_colunms]\n",
    "discretizer = QuantileDiscretizer(numBuckets=10, inputCols=continuous_colunms, outputCols=temp_cols, handleInvalid='skip')\n",
    "discretizer = discretizer.fit(train_dataset)\n",
    "train_df_bucketed = discretizer.transform(train_dataset)\n",
    "train_df_bucketed = train_df_bucketed.drop(*continuous_colunms)\n",
    "train_df_bucketed = train_df_bucketed.select(*(F.col(c).alias(c.replace(output_suffix, '')) if c in temp_cols else F.col(c) for c in train_df_bucketed.columns))\n",
    "train_df_bucketed = train_df_bucketed.select(*train_dataset.columns)\n",
    "train_df_bucketed.limit(10).toPandas()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "dd73063a-9031-4c2c-9cb6-8a668a750728",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>loanAmnt</th>\n",
       "      <th>term</th>\n",
       "      <th>interestRate</th>\n",
       "      <th>installment</th>\n",
       "      <th>grade</th>\n",
       "      <th>subGrade</th>\n",
       "      <th>employmentTitle</th>\n",
       "      <th>employmentLength</th>\n",
       "      <th>homeOwnership</th>\n",
       "      <th>annualIncome</th>\n",
       "      <th>verificationStatus</th>\n",
       "      <th>issueDate</th>\n",
       "      <th>purpose</th>\n",
       "      <th>postCode</th>\n",
       "      <th>regionCode</th>\n",
       "      <th>dti</th>\n",
       "      <th>delinquency_2years</th>\n",
       "      <th>ficoRangeLow</th>\n",
       "      <th>ficoRangeHigh</th>\n",
       "      <th>openAcc</th>\n",
       "      <th>pubRec</th>\n",
       "      <th>pubRecBankruptcies</th>\n",
       "      <th>revolBal</th>\n",
       "      <th>revolUtil</th>\n",
       "      <th>totalAcc</th>\n",
       "      <th>initialListStatus</th>\n",
       "      <th>applicationType</th>\n",
       "      <th>earliesCreditLine</th>\n",
       "      <th>title</th>\n",
       "      <th>policyCode</th>\n",
       "      <th>n0</th>\n",
       "      <th>n1</th>\n",
       "      <th>n2</th>\n",
       "      <th>n3</th>\n",
       "      <th>n4</th>\n",
       "      <th>n5</th>\n",
       "      <th>n6</th>\n",
       "      <th>n7</th>\n",
       "      <th>n8</th>\n",
       "      <th>n9</th>\n",
       "      <th>n10</th>\n",
       "      <th>n11</th>\n",
       "      <th>n12</th>\n",
       "      <th>n13</th>\n",
       "      <th>n14</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>800000</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3</td>\n",
       "      <td>3.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>B</td>\n",
       "      <td>B3</td>\n",
       "      <td>4.0</td>\n",
       "      <td>10+ years</td>\n",
       "      <td>0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0</td>\n",
       "      <td>2014-07-01</td>\n",
       "      <td>0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>21</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>Nov-1974</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>17.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>800001</td>\n",
       "      <td>7.0</td>\n",
       "      <td>5</td>\n",
       "      <td>6.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>C</td>\n",
       "      <td>C5</td>\n",
       "      <td>6.0</td>\n",
       "      <td>10+ years</td>\n",
       "      <td>0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0</td>\n",
       "      <td>2015-07-01</td>\n",
       "      <td>2</td>\n",
       "      <td>5.0</td>\n",
       "      <td>8</td>\n",
       "      <td>6.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>Jul-2001</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>800002</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3</td>\n",
       "      <td>9.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>D</td>\n",
       "      <td>D4</td>\n",
       "      <td>5.0</td>\n",
       "      <td>2 years</td>\n",
       "      <td>1</td>\n",
       "      <td>4.0</td>\n",
       "      <td>2</td>\n",
       "      <td>2016-10-01</td>\n",
       "      <td>0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>20</td>\n",
       "      <td>9.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>Aug-2006</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>36.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>7.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>800003</td>\n",
       "      <td>6.0</td>\n",
       "      <td>5</td>\n",
       "      <td>6.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>C</td>\n",
       "      <td>C4</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4 years</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2014-11-01</td>\n",
       "      <td>4</td>\n",
       "      <td>5.0</td>\n",
       "      <td>11</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>Jul-2002</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>14.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>800004</td>\n",
       "      <td>9.0</td>\n",
       "      <td>3</td>\n",
       "      <td>8.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>D</td>\n",
       "      <td>D1</td>\n",
       "      <td>9.0</td>\n",
       "      <td>&lt; 1 year</td>\n",
       "      <td>1</td>\n",
       "      <td>6.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2017-10-01</td>\n",
       "      <td>0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>8</td>\n",
       "      <td>7.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>Dec-2000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>16.0</td>\n",
       "      <td>18.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>800005</td>\n",
       "      <td>6.0</td>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>A</td>\n",
       "      <td>A1</td>\n",
       "      <td>2.0</td>\n",
       "      <td>10+ years</td>\n",
       "      <td>0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>0</td>\n",
       "      <td>2017-05-01</td>\n",
       "      <td>0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>8</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>Jul-2000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>14.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>25.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>17.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>800006</td>\n",
       "      <td>8.0</td>\n",
       "      <td>5</td>\n",
       "      <td>6.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>C</td>\n",
       "      <td>C4</td>\n",
       "      <td>0.0</td>\n",
       "      <td>10+ years</td>\n",
       "      <td>1</td>\n",
       "      <td>9.0</td>\n",
       "      <td>2</td>\n",
       "      <td>2016-11-01</td>\n",
       "      <td>0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>24</td>\n",
       "      <td>7.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>Jan-1998</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>17.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>14.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>800007</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>A</td>\n",
       "      <td>A4</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6 years</td>\n",
       "      <td>0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0</td>\n",
       "      <td>2017-10-01</td>\n",
       "      <td>4</td>\n",
       "      <td>2.0</td>\n",
       "      <td>15</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Feb-2007</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>800008</td>\n",
       "      <td>5.0</td>\n",
       "      <td>5</td>\n",
       "      <td>7.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>D</td>\n",
       "      <td>D2</td>\n",
       "      <td>2.0</td>\n",
       "      <td>3 years</td>\n",
       "      <td>1</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2015-10-01</td>\n",
       "      <td>0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>May-2004</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>21.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>14.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>800009</td>\n",
       "      <td>6.0</td>\n",
       "      <td>3</td>\n",
       "      <td>1.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>A</td>\n",
       "      <td>A4</td>\n",
       "      <td>5.0</td>\n",
       "      <td>10+ years</td>\n",
       "      <td>0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>0</td>\n",
       "      <td>2014-08-01</td>\n",
       "      <td>4</td>\n",
       "      <td>2.0</td>\n",
       "      <td>26</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>Sep-1992</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       id  loanAmnt term  interestRate  installment grade subGrade  \\\n",
       "0  800000       5.0    3           3.0          6.0     B       B3   \n",
       "1  800001       7.0    5           6.0          6.0     C       C5   \n",
       "2  800002       5.0    3           9.0          5.0     D       D4   \n",
       "3  800003       6.0    5           6.0          5.0     C       C4   \n",
       "4  800004       9.0    3           8.0          9.0     D       D1   \n",
       "5  800005       6.0    3           0.0          6.0     A       A1   \n",
       "6  800006       8.0    5           6.0          7.0     C       C4   \n",
       "7  800007       0.0    3           0.0          0.0     A       A4   \n",
       "8  800008       5.0    5           7.0          3.0     D       D2   \n",
       "9  800009       6.0    3           1.0          6.0     A       A4   \n",
       "\n",
       "   employmentTitle employmentLength homeOwnership  annualIncome  \\\n",
       "0              4.0        10+ years             0           6.0   \n",
       "1              6.0        10+ years             0           3.0   \n",
       "2              5.0          2 years             1           4.0   \n",
       "3              2.0          4 years             0           1.0   \n",
       "4              9.0         < 1 year             1           6.0   \n",
       "5              2.0        10+ years             0           7.0   \n",
       "6              0.0        10+ years             1           9.0   \n",
       "7              4.0          6 years             0           3.0   \n",
       "8              2.0          3 years             1           4.0   \n",
       "9              5.0        10+ years             0           9.0   \n",
       "\n",
       "  verificationStatus   issueDate purpose  postCode regionCode  dti  \\\n",
       "0                  0  2014-07-01       0       4.0         21  2.0   \n",
       "1                  0  2015-07-01       2       5.0          8  6.0   \n",
       "2                  2  2016-10-01       0       8.0         20  9.0   \n",
       "3                  1  2014-11-01       4       5.0         11  3.0   \n",
       "4                  1  2017-10-01       0       2.0          8  7.0   \n",
       "5                  0  2017-05-01       0       8.0          8  3.0   \n",
       "6                  2  2016-11-01       0       6.0         24  7.0   \n",
       "7                  0  2017-10-01       4       2.0         15  3.0   \n",
       "8                  1  2015-10-01       0       3.0          0  7.0   \n",
       "9                  0  2014-08-01       4       2.0         26  3.0   \n",
       "\n",
       "   delinquency_2years  ficoRangeLow  ficoRangeHigh  openAcc  pubRec  \\\n",
       "0                 2.0           7.0            7.0      8.0     1.0   \n",
       "1                 2.0           2.0            2.0      0.0     1.0   \n",
       "2                 1.0           7.0            7.0      6.0     1.0   \n",
       "3                 1.0           4.0            4.0      4.0     2.0   \n",
       "4                 1.0           4.0            4.0      9.0     1.0   \n",
       "5                 1.0           9.0            9.0      8.0     1.0   \n",
       "6                 1.0           8.0            8.0      7.0     1.0   \n",
       "7                 1.0           9.0            9.0      3.0     1.0   \n",
       "8                 2.0           4.0            4.0      7.0     1.0   \n",
       "9                 1.0           7.0            7.0      4.0     1.0   \n",
       "\n",
       "   pubRecBankruptcies  revolBal  revolUtil  totalAcc initialListStatus  \\\n",
       "0                 1.0       4.0        2.0       6.0                 0   \n",
       "1                 1.0       4.0        5.0       1.0                 0   \n",
       "2                 1.0       0.0        0.0       9.0                 1   \n",
       "3                 2.0       4.0        5.0       3.0                 0   \n",
       "4                 1.0       9.0        2.0       4.0                 0   \n",
       "5                 1.0       2.0        0.0       8.0                 0   \n",
       "6                 1.0       8.0        3.0       5.0                 0   \n",
       "7                 1.0       2.0        1.0       2.0                 1   \n",
       "8                 1.0       0.0        1.0       6.0                 0   \n",
       "9                 1.0       6.0        8.0       6.0                 0   \n",
       "\n",
       "  applicationType earliesCreditLine  title  policyCode   n0   n1    n2    n3  \\\n",
       "0               0          Nov-1974    1.0         1.0  1.0  4.0   6.0   6.0   \n",
       "1               0          Jul-2001    3.0         1.0  2.0  1.0   3.0   3.0   \n",
       "2               0          Aug-2006    1.0         1.0  0.0  1.0   4.0   4.0   \n",
       "3               0          Jul-2002    3.0         1.0  0.0  2.0   2.0   2.0   \n",
       "4               0          Dec-2000    1.0         1.0  0.0  8.0  11.0  11.0   \n",
       "5               0          Jul-2000    1.0         1.0  0.0  3.0   5.0   5.0   \n",
       "6               0          Jan-1998    1.0         1.0  0.0  5.0   7.0   7.0   \n",
       "7               1          Feb-2007    3.0         1.0  0.0  3.0   3.0   3.0   \n",
       "8               0          May-2004    1.0         1.0  1.0  4.0   4.0   4.0   \n",
       "9               0          Sep-1992    3.0         1.0  2.0  2.0   4.0   4.0   \n",
       "\n",
       "    n4    n5    n6    n7    n8    n9   n10  n11  n12  n13  n14  \n",
       "0  6.0   8.0   4.0  15.0  19.0   6.0  17.0  0.0  0.0  1.0  3.0  \n",
       "1  1.0   1.0   3.0   3.0   9.0   3.0   5.0  0.0  0.0  2.0  2.0  \n",
       "2  1.0   1.0  36.0   5.0   6.0   4.0  12.0  0.0  0.0  0.0  7.0  \n",
       "3  4.0   7.0   2.0   8.0  14.0   2.0  10.0  0.0  0.0  0.0  3.0  \n",
       "4  9.0  11.0   3.0  16.0  18.0  11.0  19.0  0.0  0.0  0.0  1.0  \n",
       "5  8.0  14.0   6.0  13.0  25.0   5.0  17.0  0.0  0.0  0.0  3.0  \n",
       "6  6.0   6.0   6.0  10.0  17.0   7.0  14.0  0.0  0.0  0.0  1.0  \n",
       "7  4.0   6.0   8.0   4.0   8.0   3.0   8.0  0.0  0.0  0.0  1.0  \n",
       "8  4.0   5.0  21.0   5.0   8.0   4.0  14.0  0.0  0.0  0.0  4.0  \n",
       "9  2.0   7.0  10.0   4.0  13.0   4.0  10.0  0.0  0.0  0.0  3.0  "
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test_df_bucketed = discretizer.transform(test_dataset)\n",
    "test_df_bucketed = test_df_bucketed.drop(*continuous_colunms)\n",
    "test_df_bucketed = test_df_bucketed.select(*(F.col(c).alias(c.replace(output_suffix, '')) if c in temp_cols else F.col(c) for c in test_df_bucketed.columns))\n",
    "test_df_bucketed = test_df_bucketed.select(*test_dataset.columns)\n",
    "test_df_bucketed.limit(10).toPandas()\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "825808ae-7a71-4b8f-8f74-4c75d9337f06",
   "metadata": {},
   "source": [
    "#### WoE encoding"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "ac261737-3a81-4b83-8391-5bb46c78e3fb",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "                                                                                \r"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>loanAmnt</th>\n",
       "      <th>term</th>\n",
       "      <th>interestRate</th>\n",
       "      <th>installment</th>\n",
       "      <th>grade</th>\n",
       "      <th>subGrade</th>\n",
       "      <th>employmentTitle</th>\n",
       "      <th>employmentLength</th>\n",
       "      <th>homeOwnership</th>\n",
       "      <th>annualIncome</th>\n",
       "      <th>verificationStatus</th>\n",
       "      <th>issueDate</th>\n",
       "      <th>isDefault</th>\n",
       "      <th>purpose</th>\n",
       "      <th>postCode</th>\n",
       "      <th>regionCode</th>\n",
       "      <th>dti</th>\n",
       "      <th>delinquency_2years</th>\n",
       "      <th>ficoRangeLow</th>\n",
       "      <th>ficoRangeHigh</th>\n",
       "      <th>openAcc</th>\n",
       "      <th>pubRec</th>\n",
       "      <th>pubRecBankruptcies</th>\n",
       "      <th>revolBal</th>\n",
       "      <th>revolUtil</th>\n",
       "      <th>totalAcc</th>\n",
       "      <th>initialListStatus</th>\n",
       "      <th>applicationType</th>\n",
       "      <th>earliesCreditLine</th>\n",
       "      <th>title</th>\n",
       "      <th>policyCode</th>\n",
       "      <th>n0</th>\n",
       "      <th>n1</th>\n",
       "      <th>n2</th>\n",
       "      <th>n3</th>\n",
       "      <th>n4</th>\n",
       "      <th>n5</th>\n",
       "      <th>n6</th>\n",
       "      <th>n7</th>\n",
       "      <th>n8</th>\n",
       "      <th>n9</th>\n",
       "      <th>n10</th>\n",
       "      <th>n11</th>\n",
       "      <th>n12</th>\n",
       "      <th>n13</th>\n",
       "      <th>n14</th>\n",
       "      <th>loanAmnt_woe</th>\n",
       "      <th>term_woe</th>\n",
       "      <th>interestRate_woe</th>\n",
       "      <th>installment_woe</th>\n",
       "      <th>grade_woe</th>\n",
       "      <th>subGrade_woe</th>\n",
       "      <th>employmentTitle_woe</th>\n",
       "      <th>employmentLength_woe</th>\n",
       "      <th>homeOwnership_woe</th>\n",
       "      <th>annualIncome_woe</th>\n",
       "      <th>verificationStatus_woe</th>\n",
       "      <th>issueDate_woe</th>\n",
       "      <th>purpose_woe</th>\n",
       "      <th>postCode_woe</th>\n",
       "      <th>regionCode_woe</th>\n",
       "      <th>dti_woe</th>\n",
       "      <th>delinquency_2years_woe</th>\n",
       "      <th>ficoRangeLow_woe</th>\n",
       "      <th>ficoRangeHigh_woe</th>\n",
       "      <th>openAcc_woe</th>\n",
       "      <th>pubRec_woe</th>\n",
       "      <th>pubRecBankruptcies_woe</th>\n",
       "      <th>revolBal_woe</th>\n",
       "      <th>revolUtil_woe</th>\n",
       "      <th>totalAcc_woe</th>\n",
       "      <th>initialListStatus_woe</th>\n",
       "      <th>applicationType_woe</th>\n",
       "      <th>earliesCreditLine_woe</th>\n",
       "      <th>title_woe</th>\n",
       "      <th>policyCode_woe</th>\n",
       "      <th>n0_woe</th>\n",
       "      <th>n1_woe</th>\n",
       "      <th>n2_woe</th>\n",
       "      <th>n3_woe</th>\n",
       "      <th>n4_woe</th>\n",
       "      <th>n5_woe</th>\n",
       "      <th>n6_woe</th>\n",
       "      <th>n7_woe</th>\n",
       "      <th>n8_woe</th>\n",
       "      <th>n9_woe</th>\n",
       "      <th>n10_woe</th>\n",
       "      <th>n11_woe</th>\n",
       "      <th>n12_woe</th>\n",
       "      <th>n13_woe</th>\n",
       "      <th>n14_woe</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>5</td>\n",
       "      <td>9.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>E</td>\n",
       "      <td>E2</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2 years</td>\n",
       "      <td>2</td>\n",
       "      <td>8.0</td>\n",
       "      <td>2</td>\n",
       "      <td>2014-07-01</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>3.0</td>\n",
       "      <td>32</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>Aug-2001</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.212384</td>\n",
       "      <td>0.652008</td>\n",
       "      <td>0.980909</td>\n",
       "      <td>0.179946</td>\n",
       "      <td>0.917968</td>\n",
       "      <td>0.886628</td>\n",
       "      <td>-0.035858</td>\n",
       "      <td>0.003257</td>\n",
       "      <td>0.051106</td>\n",
       "      <td>-0.225376</td>\n",
       "      <td>0.224884</td>\n",
       "      <td>-0.070409</td>\n",
       "      <td>0.519035</td>\n",
       "      <td>-0.026490</td>\n",
       "      <td>0.075052</td>\n",
       "      <td>-0.097408</td>\n",
       "      <td>-0.023223</td>\n",
       "      <td>-0.438466</td>\n",
       "      <td>-0.438466</td>\n",
       "      <td>-0.074541</td>\n",
       "      <td>-0.037466</td>\n",
       "      <td>-0.025199</td>\n",
       "      <td>-0.029686</td>\n",
       "      <td>0.014164</td>\n",
       "      <td>-0.027039</td>\n",
       "      <td>0.015550</td>\n",
       "      <td>-0.006394</td>\n",
       "      <td>0.021583</td>\n",
       "      <td>0.120767</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-0.013497</td>\n",
       "      <td>-0.091526</td>\n",
       "      <td>-0.225819</td>\n",
       "      <td>-0.225819</td>\n",
       "      <td>-0.009372</td>\n",
       "      <td>-0.012631</td>\n",
       "      <td>0.024390</td>\n",
       "      <td>-0.101023</td>\n",
       "      <td>-0.005999</td>\n",
       "      <td>-0.220500</td>\n",
       "      <td>-0.055832</td>\n",
       "      <td>-0.001482</td>\n",
       "      <td>0.016079</td>\n",
       "      <td>0.007389</td>\n",
       "      <td>0.002365</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>7.0</td>\n",
       "      <td>5</td>\n",
       "      <td>8.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>D</td>\n",
       "      <td>D2</td>\n",
       "      <td>8.0</td>\n",
       "      <td>5 years</td>\n",
       "      <td>0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2</td>\n",
       "      <td>2012-08-01</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>18</td>\n",
       "      <td>8.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>May-2002</td>\n",
       "      <td>5.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>Empty</td>\n",
       "      <td>Empty</td>\n",
       "      <td>Empty</td>\n",
       "      <td>Empty</td>\n",
       "      <td>10.0</td>\n",
       "      <td>Empty</td>\n",
       "      <td>Empty</td>\n",
       "      <td>Empty</td>\n",
       "      <td>Empty</td>\n",
       "      <td>Empty</td>\n",
       "      <td>13.0</td>\n",
       "      <td>Empty</td>\n",
       "      <td>Empty</td>\n",
       "      <td>Empty</td>\n",
       "      <td>Empty</td>\n",
       "      <td>0.150643</td>\n",
       "      <td>0.652008</td>\n",
       "      <td>0.613972</td>\n",
       "      <td>0.066927</td>\n",
       "      <td>0.560321</td>\n",
       "      <td>0.530459</td>\n",
       "      <td>-0.239357</td>\n",
       "      <td>-0.023787</td>\n",
       "      <td>-0.185441</td>\n",
       "      <td>0.126109</td>\n",
       "      <td>0.224884</td>\n",
       "      <td>-0.238212</td>\n",
       "      <td>0.072657</td>\n",
       "      <td>-0.026490</td>\n",
       "      <td>-0.274387</td>\n",
       "      <td>0.292001</td>\n",
       "      <td>-0.023223</td>\n",
       "      <td>-0.040004</td>\n",
       "      <td>-0.040004</td>\n",
       "      <td>0.028316</td>\n",
       "      <td>-0.037466</td>\n",
       "      <td>-0.025199</td>\n",
       "      <td>0.047673</td>\n",
       "      <td>-0.031812</td>\n",
       "      <td>0.015964</td>\n",
       "      <td>-0.021991</td>\n",
       "      <td>-0.006394</td>\n",
       "      <td>0.110939</td>\n",
       "      <td>-0.335386</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-0.349841</td>\n",
       "      <td>-0.349841</td>\n",
       "      <td>-0.349841</td>\n",
       "      <td>-0.349841</td>\n",
       "      <td>0.088785</td>\n",
       "      <td>-0.349841</td>\n",
       "      <td>-0.349841</td>\n",
       "      <td>-0.349841</td>\n",
       "      <td>-0.349871</td>\n",
       "      <td>-0.349841</td>\n",
       "      <td>0.041934</td>\n",
       "      <td>0.015512</td>\n",
       "      <td>-0.349841</td>\n",
       "      <td>-0.349841</td>\n",
       "      <td>-0.349841</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>5.0</td>\n",
       "      <td>5</td>\n",
       "      <td>8.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>D</td>\n",
       "      <td>D3</td>\n",
       "      <td>6.0</td>\n",
       "      <td>8 years</td>\n",
       "      <td>0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>2</td>\n",
       "      <td>2015-10-01</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>14</td>\n",
       "      <td>7.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>May-2006</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>21.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.040077</td>\n",
       "      <td>0.652008</td>\n",
       "      <td>0.613972</td>\n",
       "      <td>-0.023396</td>\n",
       "      <td>0.560321</td>\n",
       "      <td>0.561095</td>\n",
       "      <td>0.004855</td>\n",
       "      <td>-0.014380</td>\n",
       "      <td>-0.185441</td>\n",
       "      <td>-0.063052</td>\n",
       "      <td>0.224884</td>\n",
       "      <td>-0.051211</td>\n",
       "      <td>0.072657</td>\n",
       "      <td>0.004208</td>\n",
       "      <td>-0.014202</td>\n",
       "      <td>0.163994</td>\n",
       "      <td>-0.023223</td>\n",
       "      <td>0.230524</td>\n",
       "      <td>0.230524</td>\n",
       "      <td>0.026030</td>\n",
       "      <td>-0.037466</td>\n",
       "      <td>-0.025199</td>\n",
       "      <td>-0.005503</td>\n",
       "      <td>0.014164</td>\n",
       "      <td>-0.027039</td>\n",
       "      <td>0.015550</td>\n",
       "      <td>-0.006394</td>\n",
       "      <td>0.126182</td>\n",
       "      <td>0.120767</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-0.013497</td>\n",
       "      <td>0.005277</td>\n",
       "      <td>-0.154578</td>\n",
       "      <td>-0.154578</td>\n",
       "      <td>0.099030</td>\n",
       "      <td>0.380758</td>\n",
       "      <td>0.066614</td>\n",
       "      <td>-0.101023</td>\n",
       "      <td>0.051932</td>\n",
       "      <td>-0.148058</td>\n",
       "      <td>0.042338</td>\n",
       "      <td>-0.001482</td>\n",
       "      <td>0.016079</td>\n",
       "      <td>0.007389</td>\n",
       "      <td>0.261988</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>4.0</td>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>A</td>\n",
       "      <td>A4</td>\n",
       "      <td>6.0</td>\n",
       "      <td>10+ years</td>\n",
       "      <td>1</td>\n",
       "      <td>8.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2015-08-01</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4</td>\n",
       "      <td>3.0</td>\n",
       "      <td>11</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>May-1999</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>16.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>21.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.021457</td>\n",
       "      <td>-0.268618</td>\n",
       "      <td>-1.585257</td>\n",
       "      <td>0.111268</td>\n",
       "      <td>-1.355566</td>\n",
       "      <td>-1.240844</td>\n",
       "      <td>0.004855</td>\n",
       "      <td>-0.080023</td>\n",
       "      <td>0.192890</td>\n",
       "      <td>-0.225376</td>\n",
       "      <td>0.060872</td>\n",
       "      <td>-0.011346</td>\n",
       "      <td>-0.201407</td>\n",
       "      <td>-0.026490</td>\n",
       "      <td>0.074603</td>\n",
       "      <td>-0.097408</td>\n",
       "      <td>-0.023223</td>\n",
       "      <td>0.135115</td>\n",
       "      <td>0.135115</td>\n",
       "      <td>-0.021826</td>\n",
       "      <td>-0.037466</td>\n",
       "      <td>-0.025199</td>\n",
       "      <td>0.031703</td>\n",
       "      <td>0.069930</td>\n",
       "      <td>-0.027039</td>\n",
       "      <td>-0.021991</td>\n",
       "      <td>-0.006394</td>\n",
       "      <td>-0.003766</td>\n",
       "      <td>-0.149019</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.003692</td>\n",
       "      <td>0.029135</td>\n",
       "      <td>0.044226</td>\n",
       "      <td>0.044226</td>\n",
       "      <td>-0.009372</td>\n",
       "      <td>-0.075491</td>\n",
       "      <td>0.016291</td>\n",
       "      <td>-0.013259</td>\n",
       "      <td>0.007773</td>\n",
       "      <td>0.051284</td>\n",
       "      <td>-0.002343</td>\n",
       "      <td>-0.001482</td>\n",
       "      <td>0.016079</td>\n",
       "      <td>0.007389</td>\n",
       "      <td>-0.148513</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>C</td>\n",
       "      <td>C2</td>\n",
       "      <td>1.0</td>\n",
       "      <td>Empty</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2</td>\n",
       "      <td>2016-03-01</td>\n",
       "      <td>0.0</td>\n",
       "      <td>10</td>\n",
       "      <td>6.0</td>\n",
       "      <td>21</td>\n",
       "      <td>9.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>Aug-1977</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>-0.311320</td>\n",
       "      <td>-0.268618</td>\n",
       "      <td>0.028718</td>\n",
       "      <td>-0.410185</td>\n",
       "      <td>0.152694</td>\n",
       "      <td>0.045582</td>\n",
       "      <td>0.196091</td>\n",
       "      <td>0.388563</td>\n",
       "      <td>0.192890</td>\n",
       "      <td>0.233323</td>\n",
       "      <td>0.224884</td>\n",
       "      <td>0.108472</td>\n",
       "      <td>0.179074</td>\n",
       "      <td>0.004208</td>\n",
       "      <td>0.096883</td>\n",
       "      <td>0.492354</td>\n",
       "      <td>-0.023223</td>\n",
       "      <td>0.039023</td>\n",
       "      <td>0.039023</td>\n",
       "      <td>0.018438</td>\n",
       "      <td>-0.037466</td>\n",
       "      <td>-0.025199</td>\n",
       "      <td>-0.007056</td>\n",
       "      <td>-0.101815</td>\n",
       "      <td>-0.027039</td>\n",
       "      <td>0.015550</td>\n",
       "      <td>-0.006394</td>\n",
       "      <td>0.176897</td>\n",
       "      <td>-0.045973</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.132020</td>\n",
       "      <td>-0.091526</td>\n",
       "      <td>0.100124</td>\n",
       "      <td>0.100124</td>\n",
       "      <td>-0.028484</td>\n",
       "      <td>0.060540</td>\n",
       "      <td>-0.024741</td>\n",
       "      <td>0.052799</td>\n",
       "      <td>0.008057</td>\n",
       "      <td>0.096393</td>\n",
       "      <td>0.032834</td>\n",
       "      <td>-0.001482</td>\n",
       "      <td>0.016079</td>\n",
       "      <td>0.007389</td>\n",
       "      <td>0.261988</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>5</td>\n",
       "      <td>4.0</td>\n",
       "      <td>3</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>A</td>\n",
       "      <td>A5</td>\n",
       "      <td>6.0</td>\n",
       "      <td>7 years</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2</td>\n",
       "      <td>2017-04-01</td>\n",
       "      <td>0.0</td>\n",
       "      <td>9</td>\n",
       "      <td>8.0</td>\n",
       "      <td>21</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>Jul-1998</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>48.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.021457</td>\n",
       "      <td>-0.268618</td>\n",
       "      <td>-0.975234</td>\n",
       "      <td>0.111268</td>\n",
       "      <td>-1.355566</td>\n",
       "      <td>-0.981811</td>\n",
       "      <td>0.004855</td>\n",
       "      <td>-0.028205</td>\n",
       "      <td>-0.185441</td>\n",
       "      <td>0.182694</td>\n",
       "      <td>0.224884</td>\n",
       "      <td>0.222428</td>\n",
       "      <td>0.113962</td>\n",
       "      <td>0.033406</td>\n",
       "      <td>0.096883</td>\n",
       "      <td>-0.097408</td>\n",
       "      <td>-0.023223</td>\n",
       "      <td>-0.438466</td>\n",
       "      <td>-0.438466</td>\n",
       "      <td>0.124319</td>\n",
       "      <td>-0.037466</td>\n",
       "      <td>-0.025199</td>\n",
       "      <td>-0.005503</td>\n",
       "      <td>-0.101815</td>\n",
       "      <td>-0.022366</td>\n",
       "      <td>-0.021991</td>\n",
       "      <td>-0.006394</td>\n",
       "      <td>-0.062422</td>\n",
       "      <td>-0.045973</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.278610</td>\n",
       "      <td>-0.076569</td>\n",
       "      <td>-0.225819</td>\n",
       "      <td>-0.225819</td>\n",
       "      <td>0.033582</td>\n",
       "      <td>0.222077</td>\n",
       "      <td>0.276938</td>\n",
       "      <td>-0.091279</td>\n",
       "      <td>0.101450</td>\n",
       "      <td>-0.220500</td>\n",
       "      <td>0.058839</td>\n",
       "      <td>-0.001482</td>\n",
       "      <td>0.016079</td>\n",
       "      <td>0.007389</td>\n",
       "      <td>-0.290570</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>6</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>A</td>\n",
       "      <td>A4</td>\n",
       "      <td>8.0</td>\n",
       "      <td>9 years</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>2014-10-01</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>14</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>Oct-2006</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>18.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>-0.311320</td>\n",
       "      <td>-0.268618</td>\n",
       "      <td>-0.975234</td>\n",
       "      <td>-0.410185</td>\n",
       "      <td>-1.355566</td>\n",
       "      <td>-1.240844</td>\n",
       "      <td>-0.239357</td>\n",
       "      <td>-0.006973</td>\n",
       "      <td>-0.185441</td>\n",
       "      <td>0.182694</td>\n",
       "      <td>-0.367220</td>\n",
       "      <td>0.001535</td>\n",
       "      <td>0.072657</td>\n",
       "      <td>0.033406</td>\n",
       "      <td>-0.014202</td>\n",
       "      <td>-0.097408</td>\n",
       "      <td>-0.023223</td>\n",
       "      <td>-0.846917</td>\n",
       "      <td>-0.846917</td>\n",
       "      <td>0.018438</td>\n",
       "      <td>-0.037466</td>\n",
       "      <td>-0.025199</td>\n",
       "      <td>-0.005503</td>\n",
       "      <td>-0.359315</td>\n",
       "      <td>-0.026171</td>\n",
       "      <td>0.015550</td>\n",
       "      <td>-0.006394</td>\n",
       "      <td>0.121154</td>\n",
       "      <td>0.120767</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-0.013497</td>\n",
       "      <td>-0.076569</td>\n",
       "      <td>-0.154578</td>\n",
       "      <td>-0.154578</td>\n",
       "      <td>0.053024</td>\n",
       "      <td>-0.034298</td>\n",
       "      <td>0.006573</td>\n",
       "      <td>0.052799</td>\n",
       "      <td>-0.010330</td>\n",
       "      <td>-0.148058</td>\n",
       "      <td>0.032834</td>\n",
       "      <td>-0.001482</td>\n",
       "      <td>0.016079</td>\n",
       "      <td>0.007389</td>\n",
       "      <td>0.146186</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>7</td>\n",
       "      <td>4.0</td>\n",
       "      <td>3</td>\n",
       "      <td>6.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>C</td>\n",
       "      <td>C3</td>\n",
       "      <td>8.0</td>\n",
       "      <td>1 year</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2</td>\n",
       "      <td>2014-01-01</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4</td>\n",
       "      <td>9.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>Dec-1994</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>16.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>21.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.021457</td>\n",
       "      <td>-0.268618</td>\n",
       "      <td>0.194969</td>\n",
       "      <td>0.107892</td>\n",
       "      <td>0.152694</td>\n",
       "      <td>0.150148</td>\n",
       "      <td>-0.239357</td>\n",
       "      <td>0.035152</td>\n",
       "      <td>0.192890</td>\n",
       "      <td>0.233323</td>\n",
       "      <td>0.224884</td>\n",
       "      <td>-0.271036</td>\n",
       "      <td>0.072657</td>\n",
       "      <td>0.013813</td>\n",
       "      <td>-0.015749</td>\n",
       "      <td>0.492354</td>\n",
       "      <td>-0.023223</td>\n",
       "      <td>0.331776</td>\n",
       "      <td>0.331776</td>\n",
       "      <td>-0.049954</td>\n",
       "      <td>0.173080</td>\n",
       "      <td>0.167573</td>\n",
       "      <td>0.047673</td>\n",
       "      <td>0.096644</td>\n",
       "      <td>-0.046589</td>\n",
       "      <td>-0.021991</td>\n",
       "      <td>-0.006394</td>\n",
       "      <td>-0.126231</td>\n",
       "      <td>0.120767</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-0.013497</td>\n",
       "      <td>0.029135</td>\n",
       "      <td>-0.081708</td>\n",
       "      <td>-0.081708</td>\n",
       "      <td>-0.009372</td>\n",
       "      <td>-0.075491</td>\n",
       "      <td>-0.004990</td>\n",
       "      <td>-0.038488</td>\n",
       "      <td>0.007773</td>\n",
       "      <td>-0.075945</td>\n",
       "      <td>-0.035732</td>\n",
       "      <td>-0.001482</td>\n",
       "      <td>0.016079</td>\n",
       "      <td>0.007389</td>\n",
       "      <td>0.002365</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>8</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3</td>\n",
       "      <td>5.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>C</td>\n",
       "      <td>C2</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5 years</td>\n",
       "      <td>2</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2016-05-01</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>13</td>\n",
       "      <td>5.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>Apr-1994</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>17.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>Empty</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0.040077</td>\n",
       "      <td>-0.268618</td>\n",
       "      <td>0.028718</td>\n",
       "      <td>0.107892</td>\n",
       "      <td>0.152694</td>\n",
       "      <td>0.045582</td>\n",
       "      <td>0.196091</td>\n",
       "      <td>-0.023787</td>\n",
       "      <td>0.051106</td>\n",
       "      <td>0.059751</td>\n",
       "      <td>0.060872</td>\n",
       "      <td>0.339576</td>\n",
       "      <td>0.072657</td>\n",
       "      <td>0.024108</td>\n",
       "      <td>0.121614</td>\n",
       "      <td>-0.010114</td>\n",
       "      <td>-0.023223</td>\n",
       "      <td>0.039023</td>\n",
       "      <td>0.039023</td>\n",
       "      <td>0.054652</td>\n",
       "      <td>-0.037466</td>\n",
       "      <td>-0.025199</td>\n",
       "      <td>-0.029686</td>\n",
       "      <td>0.014164</td>\n",
       "      <td>-0.007714</td>\n",
       "      <td>-0.021991</td>\n",
       "      <td>-0.006394</td>\n",
       "      <td>-0.177769</td>\n",
       "      <td>0.120767</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-0.013497</td>\n",
       "      <td>0.175882</td>\n",
       "      <td>0.379094</td>\n",
       "      <td>0.379094</td>\n",
       "      <td>0.053024</td>\n",
       "      <td>-0.001286</td>\n",
       "      <td>0.020473</td>\n",
       "      <td>0.107856</td>\n",
       "      <td>-0.008783</td>\n",
       "      <td>0.307821</td>\n",
       "      <td>0.056958</td>\n",
       "      <td>0.015512</td>\n",
       "      <td>0.016079</td>\n",
       "      <td>0.007389</td>\n",
       "      <td>0.403527</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>9</td>\n",
       "      <td>2.0</td>\n",
       "      <td>3</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>B</td>\n",
       "      <td>B4</td>\n",
       "      <td>1.0</td>\n",
       "      <td>Empty</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2</td>\n",
       "      <td>2015-11-01</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>11</td>\n",
       "      <td>7.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>Jan-1993</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>21.0</td>\n",
       "      <td>24.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>39.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>-0.222567</td>\n",
       "      <td>-0.268618</td>\n",
       "      <td>-0.321798</td>\n",
       "      <td>-0.248102</td>\n",
       "      <td>-0.485412</td>\n",
       "      <td>-0.355973</td>\n",
       "      <td>0.196091</td>\n",
       "      <td>0.388563</td>\n",
       "      <td>0.192890</td>\n",
       "      <td>0.233323</td>\n",
       "      <td>0.224884</td>\n",
       "      <td>-0.008104</td>\n",
       "      <td>0.072657</td>\n",
       "      <td>0.008826</td>\n",
       "      <td>0.074603</td>\n",
       "      <td>0.163994</td>\n",
       "      <td>-0.023223</td>\n",
       "      <td>-0.438466</td>\n",
       "      <td>-0.438466</td>\n",
       "      <td>-0.074541</td>\n",
       "      <td>-0.037466</td>\n",
       "      <td>-0.025199</td>\n",
       "      <td>-0.007056</td>\n",
       "      <td>-0.101815</td>\n",
       "      <td>-0.022366</td>\n",
       "      <td>0.015550</td>\n",
       "      <td>-0.006394</td>\n",
       "      <td>-0.129943</td>\n",
       "      <td>0.120767</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-0.013497</td>\n",
       "      <td>0.029135</td>\n",
       "      <td>-0.010603</td>\n",
       "      <td>-0.010603</td>\n",
       "      <td>-0.009372</td>\n",
       "      <td>-0.045400</td>\n",
       "      <td>0.023241</td>\n",
       "      <td>-0.031142</td>\n",
       "      <td>0.066247</td>\n",
       "      <td>-0.007837</td>\n",
       "      <td>-0.055832</td>\n",
       "      <td>-0.001482</td>\n",
       "      <td>0.016079</td>\n",
       "      <td>0.007389</td>\n",
       "      <td>0.544376</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  id  loanAmnt term  interestRate  installment grade subGrade  \\\n",
       "0  0       9.0    5           9.0          9.0     E       E2   \n",
       "1  1       7.0    5           8.0          6.0     D       D2   \n",
       "2  2       5.0    5           8.0          3.0     D       D3   \n",
       "3  3       4.0    3           0.0          4.0     A       A4   \n",
       "4  4       0.0    3           5.0          0.0     C       C2   \n",
       "5  5       4.0    3           1.0          4.0     A       A5   \n",
       "6  6       0.0    3           1.0          0.0     A       A4   \n",
       "7  7       4.0    3           6.0          5.0     C       C3   \n",
       "8  8       5.0    3           5.0          5.0     C       C2   \n",
       "9  9       2.0    3           3.0          1.0     B       B4   \n",
       "\n",
       "   employmentTitle employmentLength homeOwnership  annualIncome  \\\n",
       "0              2.0          2 years             2           8.0   \n",
       "1              8.0          5 years             0           2.0   \n",
       "2              6.0          8 years             0           6.0   \n",
       "3              6.0        10+ years             1           8.0   \n",
       "4              1.0            Empty             1           0.0   \n",
       "5              6.0          7 years             0           1.0   \n",
       "6              8.0          9 years             0           1.0   \n",
       "7              8.0           1 year             1           0.0   \n",
       "8              1.0          5 years             2           4.0   \n",
       "9              1.0            Empty             1           0.0   \n",
       "\n",
       "  verificationStatus   issueDate  isDefault purpose  postCode regionCode  dti  \\\n",
       "0                  2  2014-07-01        1.0       1       3.0         32  4.0   \n",
       "1                  2  2012-08-01        0.0       0       3.0         18  8.0   \n",
       "2                  2  2015-10-01        0.0       0       6.0         14  7.0   \n",
       "3                  1  2015-08-01        0.0       4       3.0         11  4.0   \n",
       "4                  2  2016-03-01        0.0      10       6.0         21  9.0   \n",
       "5                  2  2017-04-01        0.0       9       8.0         21  4.0   \n",
       "6                  0  2014-10-01        0.0       0       8.0         14  4.0   \n",
       "7                  2  2014-01-01        0.0       0       2.0          4  9.0   \n",
       "8                  1  2016-05-01        1.0       0       9.0         13  5.0   \n",
       "9                  2  2015-11-01        0.0       0       1.0         11  7.0   \n",
       "\n",
       "   delinquency_2years  ficoRangeLow  ficoRangeHigh  openAcc  pubRec  \\\n",
       "0                 1.0           8.0            8.0      2.0     1.0   \n",
       "1                 1.0           6.0            6.0      7.0     1.0   \n",
       "2                 1.0           3.0            3.0      5.0     1.0   \n",
       "3                 1.0           4.0            4.0      4.0     1.0   \n",
       "4                 1.0           5.0            5.0      6.0     1.0   \n",
       "5                 1.0           8.0            8.0      9.0     1.0   \n",
       "6                 1.0           9.0            9.0      6.0     1.0   \n",
       "7                 1.0           1.0            1.0      3.0     2.0   \n",
       "8                 1.0           5.0            5.0      8.0     1.0   \n",
       "9                 1.0           8.0            8.0      2.0     1.0   \n",
       "\n",
       "   pubRecBankruptcies  revolBal  revolUtil  totalAcc initialListStatus  \\\n",
       "0                 1.0       8.0        4.0       6.0                 0   \n",
       "1                 1.0       6.0        3.0       3.0                 1   \n",
       "2                 1.0       1.0        4.0       6.0                 0   \n",
       "3                 1.0       4.0        5.0       6.0                 1   \n",
       "4                 1.0       0.0        2.0       6.0                 0   \n",
       "5                 1.0       1.0        2.0       9.0                 1   \n",
       "6                 1.0       1.0        0.0       5.0                 0   \n",
       "7                 2.0       6.0        6.0       7.0                 1   \n",
       "8                 1.0       8.0        4.0       4.0                 1   \n",
       "9                 1.0       0.0        2.0       9.0                 0   \n",
       "\n",
       "  applicationType earliesCreditLine  title  policyCode     n0     n1     n2  \\\n",
       "0               0          Aug-2001    1.0         1.0    0.0    2.0    2.0   \n",
       "1               0          May-2002    5.0         1.0  Empty  Empty  Empty   \n",
       "2               0          May-2006    1.0         1.0    0.0    0.0    3.0   \n",
       "3               0          May-1999    3.0         1.0    6.0    4.0    6.0   \n",
       "4               0          Aug-1977    4.0         1.0    1.0    2.0    7.0   \n",
       "5               0          Jul-1998    4.0         1.0   12.0    1.0    2.0   \n",
       "6               0          Oct-2006    1.0         1.0    0.0    1.0    3.0   \n",
       "7               0          Dec-1994    1.0         1.0    0.0    4.0    4.0   \n",
       "8               0          Apr-1994    1.0         1.0    0.0    7.0   13.0   \n",
       "9               0          Jan-1993    1.0         1.0    0.0    4.0    5.0   \n",
       "\n",
       "      n3    n4     n5     n6     n7     n8     n9   n10    n11    n12    n13  \\\n",
       "0    2.0   4.0    9.0    8.0    4.0   12.0    2.0   7.0    0.0    0.0    0.0   \n",
       "1  Empty  10.0  Empty  Empty  Empty  Empty  Empty  13.0  Empty  Empty  Empty   \n",
       "2    3.0   0.0    0.0   21.0    4.0    5.0    3.0  11.0    0.0    0.0    0.0   \n",
       "3    6.0   4.0   16.0    4.0    7.0   21.0    6.0   9.0    0.0    0.0    0.0   \n",
       "4    7.0   2.0    4.0    9.0   10.0   15.0    7.0  12.0    0.0    0.0    0.0   \n",
       "5    2.0   1.0    1.0   48.0    2.0    3.0    2.0  19.0    0.0    0.0    0.0   \n",
       "6    3.0   7.0   11.0    3.0   10.0   18.0    3.0  12.0    0.0    0.0    0.0   \n",
       "7    4.0   4.0   16.0   10.0    5.0   21.0    4.0   8.0    0.0    0.0    0.0   \n",
       "8   13.0   7.0    7.0    2.0   13.0   17.0   11.0  15.0  Empty    0.0    0.0   \n",
       "9    5.0   4.0   21.0   24.0    6.0   39.0    5.0   7.0    0.0    0.0    0.0   \n",
       "\n",
       "     n14  loanAmnt_woe  term_woe  interestRate_woe  installment_woe  \\\n",
       "0    2.0      0.212384  0.652008          0.980909         0.179946   \n",
       "1  Empty      0.150643  0.652008          0.613972         0.066927   \n",
       "2    4.0      0.040077  0.652008          0.613972        -0.023396   \n",
       "3    1.0      0.021457 -0.268618         -1.585257         0.111268   \n",
       "4    4.0     -0.311320 -0.268618          0.028718        -0.410185   \n",
       "5    0.0      0.021457 -0.268618         -0.975234         0.111268   \n",
       "6    3.0     -0.311320 -0.268618         -0.975234        -0.410185   \n",
       "7    2.0      0.021457 -0.268618          0.194969         0.107892   \n",
       "8    6.0      0.040077 -0.268618          0.028718         0.107892   \n",
       "9    8.0     -0.222567 -0.268618         -0.321798        -0.248102   \n",
       "\n",
       "   grade_woe  subGrade_woe  employmentTitle_woe  employmentLength_woe  \\\n",
       "0   0.917968      0.886628            -0.035858              0.003257   \n",
       "1   0.560321      0.530459            -0.239357             -0.023787   \n",
       "2   0.560321      0.561095             0.004855             -0.014380   \n",
       "3  -1.355566     -1.240844             0.004855             -0.080023   \n",
       "4   0.152694      0.045582             0.196091              0.388563   \n",
       "5  -1.355566     -0.981811             0.004855             -0.028205   \n",
       "6  -1.355566     -1.240844            -0.239357             -0.006973   \n",
       "7   0.152694      0.150148            -0.239357              0.035152   \n",
       "8   0.152694      0.045582             0.196091             -0.023787   \n",
       "9  -0.485412     -0.355973             0.196091              0.388563   \n",
       "\n",
       "   homeOwnership_woe  annualIncome_woe  verificationStatus_woe  issueDate_woe  \\\n",
       "0           0.051106         -0.225376                0.224884      -0.070409   \n",
       "1          -0.185441          0.126109                0.224884      -0.238212   \n",
       "2          -0.185441         -0.063052                0.224884      -0.051211   \n",
       "3           0.192890         -0.225376                0.060872      -0.011346   \n",
       "4           0.192890          0.233323                0.224884       0.108472   \n",
       "5          -0.185441          0.182694                0.224884       0.222428   \n",
       "6          -0.185441          0.182694               -0.367220       0.001535   \n",
       "7           0.192890          0.233323                0.224884      -0.271036   \n",
       "8           0.051106          0.059751                0.060872       0.339576   \n",
       "9           0.192890          0.233323                0.224884      -0.008104   \n",
       "\n",
       "   purpose_woe  postCode_woe  regionCode_woe   dti_woe  \\\n",
       "0     0.519035     -0.026490        0.075052 -0.097408   \n",
       "1     0.072657     -0.026490       -0.274387  0.292001   \n",
       "2     0.072657      0.004208       -0.014202  0.163994   \n",
       "3    -0.201407     -0.026490        0.074603 -0.097408   \n",
       "4     0.179074      0.004208        0.096883  0.492354   \n",
       "5     0.113962      0.033406        0.096883 -0.097408   \n",
       "6     0.072657      0.033406       -0.014202 -0.097408   \n",
       "7     0.072657      0.013813       -0.015749  0.492354   \n",
       "8     0.072657      0.024108        0.121614 -0.010114   \n",
       "9     0.072657      0.008826        0.074603  0.163994   \n",
       "\n",
       "   delinquency_2years_woe  ficoRangeLow_woe  ficoRangeHigh_woe  openAcc_woe  \\\n",
       "0               -0.023223         -0.438466          -0.438466    -0.074541   \n",
       "1               -0.023223         -0.040004          -0.040004     0.028316   \n",
       "2               -0.023223          0.230524           0.230524     0.026030   \n",
       "3               -0.023223          0.135115           0.135115    -0.021826   \n",
       "4               -0.023223          0.039023           0.039023     0.018438   \n",
       "5               -0.023223         -0.438466          -0.438466     0.124319   \n",
       "6               -0.023223         -0.846917          -0.846917     0.018438   \n",
       "7               -0.023223          0.331776           0.331776    -0.049954   \n",
       "8               -0.023223          0.039023           0.039023     0.054652   \n",
       "9               -0.023223         -0.438466          -0.438466    -0.074541   \n",
       "\n",
       "   pubRec_woe  pubRecBankruptcies_woe  revolBal_woe  revolUtil_woe  \\\n",
       "0   -0.037466               -0.025199     -0.029686       0.014164   \n",
       "1   -0.037466               -0.025199      0.047673      -0.031812   \n",
       "2   -0.037466               -0.025199     -0.005503       0.014164   \n",
       "3   -0.037466               -0.025199      0.031703       0.069930   \n",
       "4   -0.037466               -0.025199     -0.007056      -0.101815   \n",
       "5   -0.037466               -0.025199     -0.005503      -0.101815   \n",
       "6   -0.037466               -0.025199     -0.005503      -0.359315   \n",
       "7    0.173080                0.167573      0.047673       0.096644   \n",
       "8   -0.037466               -0.025199     -0.029686       0.014164   \n",
       "9   -0.037466               -0.025199     -0.007056      -0.101815   \n",
       "\n",
       "   totalAcc_woe  initialListStatus_woe  applicationType_woe  \\\n",
       "0     -0.027039               0.015550            -0.006394   \n",
       "1      0.015964              -0.021991            -0.006394   \n",
       "2     -0.027039               0.015550            -0.006394   \n",
       "3     -0.027039              -0.021991            -0.006394   \n",
       "4     -0.027039               0.015550            -0.006394   \n",
       "5     -0.022366              -0.021991            -0.006394   \n",
       "6     -0.026171               0.015550            -0.006394   \n",
       "7     -0.046589              -0.021991            -0.006394   \n",
       "8     -0.007714              -0.021991            -0.006394   \n",
       "9     -0.022366               0.015550            -0.006394   \n",
       "\n",
       "   earliesCreditLine_woe  title_woe  policyCode_woe    n0_woe    n1_woe  \\\n",
       "0               0.021583   0.120767             0.0 -0.013497 -0.091526   \n",
       "1               0.110939  -0.335386             0.0 -0.349841 -0.349841   \n",
       "2               0.126182   0.120767             0.0 -0.013497  0.005277   \n",
       "3              -0.003766  -0.149019             0.0  0.003692  0.029135   \n",
       "4               0.176897  -0.045973             0.0  0.132020 -0.091526   \n",
       "5              -0.062422  -0.045973             0.0  0.278610 -0.076569   \n",
       "6               0.121154   0.120767             0.0 -0.013497 -0.076569   \n",
       "7              -0.126231   0.120767             0.0 -0.013497  0.029135   \n",
       "8              -0.177769   0.120767             0.0 -0.013497  0.175882   \n",
       "9              -0.129943   0.120767             0.0 -0.013497  0.029135   \n",
       "\n",
       "     n2_woe    n3_woe    n4_woe    n5_woe    n6_woe    n7_woe    n8_woe  \\\n",
       "0 -0.225819 -0.225819 -0.009372 -0.012631  0.024390 -0.101023 -0.005999   \n",
       "1 -0.349841 -0.349841  0.088785 -0.349841 -0.349841 -0.349841 -0.349871   \n",
       "2 -0.154578 -0.154578  0.099030  0.380758  0.066614 -0.101023  0.051932   \n",
       "3  0.044226  0.044226 -0.009372 -0.075491  0.016291 -0.013259  0.007773   \n",
       "4  0.100124  0.100124 -0.028484  0.060540 -0.024741  0.052799  0.008057   \n",
       "5 -0.225819 -0.225819  0.033582  0.222077  0.276938 -0.091279  0.101450   \n",
       "6 -0.154578 -0.154578  0.053024 -0.034298  0.006573  0.052799 -0.010330   \n",
       "7 -0.081708 -0.081708 -0.009372 -0.075491 -0.004990 -0.038488  0.007773   \n",
       "8  0.379094  0.379094  0.053024 -0.001286  0.020473  0.107856 -0.008783   \n",
       "9 -0.010603 -0.010603 -0.009372 -0.045400  0.023241 -0.031142  0.066247   \n",
       "\n",
       "     n9_woe   n10_woe   n11_woe   n12_woe   n13_woe   n14_woe  \n",
       "0 -0.220500 -0.055832 -0.001482  0.016079  0.007389  0.002365  \n",
       "1 -0.349841  0.041934  0.015512 -0.349841 -0.349841 -0.349841  \n",
       "2 -0.148058  0.042338 -0.001482  0.016079  0.007389  0.261988  \n",
       "3  0.051284 -0.002343 -0.001482  0.016079  0.007389 -0.148513  \n",
       "4  0.096393  0.032834 -0.001482  0.016079  0.007389  0.261988  \n",
       "5 -0.220500  0.058839 -0.001482  0.016079  0.007389 -0.290570  \n",
       "6 -0.148058  0.032834 -0.001482  0.016079  0.007389  0.146186  \n",
       "7 -0.075945 -0.035732 -0.001482  0.016079  0.007389  0.002365  \n",
       "8  0.307821  0.056958  0.015512  0.016079  0.007389  0.403527  \n",
       "9 -0.007837 -0.055832 -0.001482  0.016079  0.007389  0.544376  "
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "woe_encoder = WoeEncoder(colunms_to_woe, 'isDefault')\n",
    "woe_encoder.fit(train_df_bucketed)\n",
    "train_df_woe = woe_encoder.transform(train_df_bucketed)\n",
    "train_df_woe.limit(10).toPandas()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "a07a0d7d-e0f5-4fb3-a03c-b30ee1b9f76a",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "                                                                                \r"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>loanAmnt</th>\n",
       "      <th>term</th>\n",
       "      <th>interestRate</th>\n",
       "      <th>installment</th>\n",
       "      <th>grade</th>\n",
       "      <th>subGrade</th>\n",
       "      <th>employmentTitle</th>\n",
       "      <th>employmentLength</th>\n",
       "      <th>homeOwnership</th>\n",
       "      <th>annualIncome</th>\n",
       "      <th>verificationStatus</th>\n",
       "      <th>issueDate</th>\n",
       "      <th>purpose</th>\n",
       "      <th>postCode</th>\n",
       "      <th>regionCode</th>\n",
       "      <th>dti</th>\n",
       "      <th>delinquency_2years</th>\n",
       "      <th>ficoRangeLow</th>\n",
       "      <th>ficoRangeHigh</th>\n",
       "      <th>openAcc</th>\n",
       "      <th>pubRec</th>\n",
       "      <th>pubRecBankruptcies</th>\n",
       "      <th>revolBal</th>\n",
       "      <th>revolUtil</th>\n",
       "      <th>totalAcc</th>\n",
       "      <th>initialListStatus</th>\n",
       "      <th>applicationType</th>\n",
       "      <th>earliesCreditLine</th>\n",
       "      <th>title</th>\n",
       "      <th>policyCode</th>\n",
       "      <th>n0</th>\n",
       "      <th>n1</th>\n",
       "      <th>n2</th>\n",
       "      <th>n3</th>\n",
       "      <th>n4</th>\n",
       "      <th>n5</th>\n",
       "      <th>n6</th>\n",
       "      <th>n7</th>\n",
       "      <th>n8</th>\n",
       "      <th>n9</th>\n",
       "      <th>n10</th>\n",
       "      <th>n11</th>\n",
       "      <th>n12</th>\n",
       "      <th>n13</th>\n",
       "      <th>n14</th>\n",
       "      <th>loanAmnt_woe</th>\n",
       "      <th>term_woe</th>\n",
       "      <th>interestRate_woe</th>\n",
       "      <th>installment_woe</th>\n",
       "      <th>grade_woe</th>\n",
       "      <th>subGrade_woe</th>\n",
       "      <th>employmentTitle_woe</th>\n",
       "      <th>employmentLength_woe</th>\n",
       "      <th>homeOwnership_woe</th>\n",
       "      <th>annualIncome_woe</th>\n",
       "      <th>verificationStatus_woe</th>\n",
       "      <th>issueDate_woe</th>\n",
       "      <th>purpose_woe</th>\n",
       "      <th>postCode_woe</th>\n",
       "      <th>regionCode_woe</th>\n",
       "      <th>dti_woe</th>\n",
       "      <th>delinquency_2years_woe</th>\n",
       "      <th>ficoRangeLow_woe</th>\n",
       "      <th>ficoRangeHigh_woe</th>\n",
       "      <th>openAcc_woe</th>\n",
       "      <th>pubRec_woe</th>\n",
       "      <th>pubRecBankruptcies_woe</th>\n",
       "      <th>revolBal_woe</th>\n",
       "      <th>revolUtil_woe</th>\n",
       "      <th>totalAcc_woe</th>\n",
       "      <th>initialListStatus_woe</th>\n",
       "      <th>applicationType_woe</th>\n",
       "      <th>earliesCreditLine_woe</th>\n",
       "      <th>title_woe</th>\n",
       "      <th>policyCode_woe</th>\n",
       "      <th>n0_woe</th>\n",
       "      <th>n1_woe</th>\n",
       "      <th>n2_woe</th>\n",
       "      <th>n3_woe</th>\n",
       "      <th>n4_woe</th>\n",
       "      <th>n5_woe</th>\n",
       "      <th>n6_woe</th>\n",
       "      <th>n7_woe</th>\n",
       "      <th>n8_woe</th>\n",
       "      <th>n9_woe</th>\n",
       "      <th>n10_woe</th>\n",
       "      <th>n11_woe</th>\n",
       "      <th>n12_woe</th>\n",
       "      <th>n13_woe</th>\n",
       "      <th>n14_woe</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>800000</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3</td>\n",
       "      <td>3.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>B</td>\n",
       "      <td>B3</td>\n",
       "      <td>4.0</td>\n",
       "      <td>10+ years</td>\n",
       "      <td>0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0</td>\n",
       "      <td>2014-07-01</td>\n",
       "      <td>0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>21</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>Nov-1974</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>17.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.040077</td>\n",
       "      <td>-0.268618</td>\n",
       "      <td>-0.321798</td>\n",
       "      <td>0.066927</td>\n",
       "      <td>-0.485412</td>\n",
       "      <td>-0.518363</td>\n",
       "      <td>0.034044</td>\n",
       "      <td>-0.080023</td>\n",
       "      <td>-0.185441</td>\n",
       "      <td>-0.063052</td>\n",
       "      <td>-0.367220</td>\n",
       "      <td>-0.070409</td>\n",
       "      <td>0.072657</td>\n",
       "      <td>-0.024552</td>\n",
       "      <td>0.096883</td>\n",
       "      <td>-0.253407</td>\n",
       "      <td>0.093865</td>\n",
       "      <td>-0.226416</td>\n",
       "      <td>-0.226416</td>\n",
       "      <td>0.054652</td>\n",
       "      <td>-0.037466</td>\n",
       "      <td>-0.025199</td>\n",
       "      <td>0.031703</td>\n",
       "      <td>-0.101815</td>\n",
       "      <td>-0.027039</td>\n",
       "      <td>0.015550</td>\n",
       "      <td>-0.006394</td>\n",
       "      <td>-0.302332</td>\n",
       "      <td>0.120767</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.132020</td>\n",
       "      <td>0.029135</td>\n",
       "      <td>0.044226</td>\n",
       "      <td>0.044226</td>\n",
       "      <td>0.018189</td>\n",
       "      <td>-0.007806</td>\n",
       "      <td>0.016291</td>\n",
       "      <td>0.159193</td>\n",
       "      <td>-0.008055</td>\n",
       "      <td>0.051284</td>\n",
       "      <td>0.056171</td>\n",
       "      <td>-0.001482</td>\n",
       "      <td>0.016079</td>\n",
       "      <td>0.165441</td>\n",
       "      <td>0.146186</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>800001</td>\n",
       "      <td>7.0</td>\n",
       "      <td>5</td>\n",
       "      <td>6.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>C</td>\n",
       "      <td>C5</td>\n",
       "      <td>6.0</td>\n",
       "      <td>10+ years</td>\n",
       "      <td>0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0</td>\n",
       "      <td>2015-07-01</td>\n",
       "      <td>2</td>\n",
       "      <td>5.0</td>\n",
       "      <td>8</td>\n",
       "      <td>6.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>Jul-2001</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.150643</td>\n",
       "      <td>0.652008</td>\n",
       "      <td>0.194969</td>\n",
       "      <td>0.066927</td>\n",
       "      <td>0.152694</td>\n",
       "      <td>0.351410</td>\n",
       "      <td>0.004855</td>\n",
       "      <td>-0.080023</td>\n",
       "      <td>-0.185441</td>\n",
       "      <td>0.089520</td>\n",
       "      <td>-0.367220</td>\n",
       "      <td>-0.000066</td>\n",
       "      <td>-0.156289</td>\n",
       "      <td>-0.021413</td>\n",
       "      <td>-0.027768</td>\n",
       "      <td>0.065022</td>\n",
       "      <td>0.093865</td>\n",
       "      <td>0.282023</td>\n",
       "      <td>0.282023</td>\n",
       "      <td>-0.097486</td>\n",
       "      <td>-0.037466</td>\n",
       "      <td>-0.025199</td>\n",
       "      <td>0.031703</td>\n",
       "      <td>0.069930</td>\n",
       "      <td>0.064885</td>\n",
       "      <td>0.015550</td>\n",
       "      <td>-0.006394</td>\n",
       "      <td>-0.096155</td>\n",
       "      <td>-0.149019</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.104123</td>\n",
       "      <td>-0.076569</td>\n",
       "      <td>-0.154578</td>\n",
       "      <td>-0.154578</td>\n",
       "      <td>0.033582</td>\n",
       "      <td>0.222077</td>\n",
       "      <td>0.006573</td>\n",
       "      <td>-0.087261</td>\n",
       "      <td>0.015904</td>\n",
       "      <td>-0.148058</td>\n",
       "      <td>-0.097875</td>\n",
       "      <td>-0.001482</td>\n",
       "      <td>0.016079</td>\n",
       "      <td>0.152472</td>\n",
       "      <td>0.002365</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>800002</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3</td>\n",
       "      <td>9.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>D</td>\n",
       "      <td>D4</td>\n",
       "      <td>5.0</td>\n",
       "      <td>2 years</td>\n",
       "      <td>1</td>\n",
       "      <td>4.0</td>\n",
       "      <td>2</td>\n",
       "      <td>2016-10-01</td>\n",
       "      <td>0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>20</td>\n",
       "      <td>9.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>Aug-2006</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>36.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>0.040077</td>\n",
       "      <td>-0.268618</td>\n",
       "      <td>0.980909</td>\n",
       "      <td>0.107892</td>\n",
       "      <td>0.560321</td>\n",
       "      <td>0.648698</td>\n",
       "      <td>0.021600</td>\n",
       "      <td>0.003257</td>\n",
       "      <td>0.192890</td>\n",
       "      <td>0.059751</td>\n",
       "      <td>0.224884</td>\n",
       "      <td>0.142289</td>\n",
       "      <td>0.072657</td>\n",
       "      <td>0.033406</td>\n",
       "      <td>0.215663</td>\n",
       "      <td>0.492354</td>\n",
       "      <td>-0.023223</td>\n",
       "      <td>-0.226416</td>\n",
       "      <td>-0.226416</td>\n",
       "      <td>0.018438</td>\n",
       "      <td>-0.037466</td>\n",
       "      <td>-0.025199</td>\n",
       "      <td>-0.007056</td>\n",
       "      <td>-0.359315</td>\n",
       "      <td>-0.022366</td>\n",
       "      <td>-0.021991</td>\n",
       "      <td>-0.006394</td>\n",
       "      <td>0.019603</td>\n",
       "      <td>0.120767</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-0.013497</td>\n",
       "      <td>-0.076569</td>\n",
       "      <td>-0.081708</td>\n",
       "      <td>-0.081708</td>\n",
       "      <td>0.033582</td>\n",
       "      <td>0.222077</td>\n",
       "      <td>0.102422</td>\n",
       "      <td>-0.038488</td>\n",
       "      <td>0.031608</td>\n",
       "      <td>-0.075945</td>\n",
       "      <td>0.032834</td>\n",
       "      <td>-0.001482</td>\n",
       "      <td>0.016079</td>\n",
       "      <td>0.007389</td>\n",
       "      <td>0.469499</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>800003</td>\n",
       "      <td>6.0</td>\n",
       "      <td>5</td>\n",
       "      <td>6.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>C</td>\n",
       "      <td>C4</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4 years</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2014-11-01</td>\n",
       "      <td>4</td>\n",
       "      <td>5.0</td>\n",
       "      <td>11</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>Jul-2002</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>14.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.100969</td>\n",
       "      <td>0.652008</td>\n",
       "      <td>0.194969</td>\n",
       "      <td>0.107892</td>\n",
       "      <td>0.152694</td>\n",
       "      <td>0.291334</td>\n",
       "      <td>-0.035858</td>\n",
       "      <td>-0.007665</td>\n",
       "      <td>-0.185441</td>\n",
       "      <td>0.182694</td>\n",
       "      <td>0.060872</td>\n",
       "      <td>-0.068065</td>\n",
       "      <td>-0.201407</td>\n",
       "      <td>-0.021413</td>\n",
       "      <td>0.074603</td>\n",
       "      <td>-0.173179</td>\n",
       "      <td>-0.023223</td>\n",
       "      <td>0.135115</td>\n",
       "      <td>0.135115</td>\n",
       "      <td>-0.021826</td>\n",
       "      <td>0.173080</td>\n",
       "      <td>0.167573</td>\n",
       "      <td>0.031703</td>\n",
       "      <td>0.069930</td>\n",
       "      <td>0.015964</td>\n",
       "      <td>0.015550</td>\n",
       "      <td>-0.006394</td>\n",
       "      <td>0.009658</td>\n",
       "      <td>-0.149019</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-0.013497</td>\n",
       "      <td>-0.091526</td>\n",
       "      <td>-0.225819</td>\n",
       "      <td>-0.225819</td>\n",
       "      <td>-0.009372</td>\n",
       "      <td>-0.001286</td>\n",
       "      <td>0.020473</td>\n",
       "      <td>0.028498</td>\n",
       "      <td>0.004929</td>\n",
       "      <td>-0.220500</td>\n",
       "      <td>-0.004251</td>\n",
       "      <td>-0.001482</td>\n",
       "      <td>0.016079</td>\n",
       "      <td>0.007389</td>\n",
       "      <td>0.146186</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>800004</td>\n",
       "      <td>9.0</td>\n",
       "      <td>3</td>\n",
       "      <td>8.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>D</td>\n",
       "      <td>D1</td>\n",
       "      <td>9.0</td>\n",
       "      <td>&lt; 1 year</td>\n",
       "      <td>1</td>\n",
       "      <td>6.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2017-10-01</td>\n",
       "      <td>0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>8</td>\n",
       "      <td>7.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>Dec-2000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>16.0</td>\n",
       "      <td>18.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.212384</td>\n",
       "      <td>-0.268618</td>\n",
       "      <td>0.613972</td>\n",
       "      <td>0.179946</td>\n",
       "      <td>0.560321</td>\n",
       "      <td>0.434848</td>\n",
       "      <td>-0.144750</td>\n",
       "      <td>0.033287</td>\n",
       "      <td>0.192890</td>\n",
       "      <td>-0.063052</td>\n",
       "      <td>0.060872</td>\n",
       "      <td>0.111884</td>\n",
       "      <td>0.072657</td>\n",
       "      <td>0.013813</td>\n",
       "      <td>-0.027768</td>\n",
       "      <td>0.163994</td>\n",
       "      <td>-0.023223</td>\n",
       "      <td>0.135115</td>\n",
       "      <td>0.135115</td>\n",
       "      <td>0.124319</td>\n",
       "      <td>-0.037466</td>\n",
       "      <td>-0.025199</td>\n",
       "      <td>-0.172406</td>\n",
       "      <td>-0.101815</td>\n",
       "      <td>-0.007714</td>\n",
       "      <td>0.015550</td>\n",
       "      <td>-0.006394</td>\n",
       "      <td>-0.086193</td>\n",
       "      <td>0.120767</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-0.013497</td>\n",
       "      <td>0.255149</td>\n",
       "      <td>0.311136</td>\n",
       "      <td>0.311136</td>\n",
       "      <td>0.075601</td>\n",
       "      <td>-0.034298</td>\n",
       "      <td>0.006573</td>\n",
       "      <td>0.151697</td>\n",
       "      <td>-0.010330</td>\n",
       "      <td>0.307821</td>\n",
       "      <td>0.058839</td>\n",
       "      <td>-0.001482</td>\n",
       "      <td>0.016079</td>\n",
       "      <td>0.007389</td>\n",
       "      <td>-0.148513</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>800005</td>\n",
       "      <td>6.0</td>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>A</td>\n",
       "      <td>A1</td>\n",
       "      <td>2.0</td>\n",
       "      <td>10+ years</td>\n",
       "      <td>0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>0</td>\n",
       "      <td>2017-05-01</td>\n",
       "      <td>0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>8</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>Jul-2000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>14.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>25.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>17.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.100969</td>\n",
       "      <td>-0.268618</td>\n",
       "      <td>-1.585257</td>\n",
       "      <td>0.066927</td>\n",
       "      <td>-1.355566</td>\n",
       "      <td>-2.022757</td>\n",
       "      <td>-0.035858</td>\n",
       "      <td>-0.080023</td>\n",
       "      <td>-0.185441</td>\n",
       "      <td>-0.136416</td>\n",
       "      <td>-0.367220</td>\n",
       "      <td>0.223627</td>\n",
       "      <td>0.072657</td>\n",
       "      <td>0.033406</td>\n",
       "      <td>-0.027768</td>\n",
       "      <td>-0.173179</td>\n",
       "      <td>-0.023223</td>\n",
       "      <td>-0.846917</td>\n",
       "      <td>-0.846917</td>\n",
       "      <td>0.054652</td>\n",
       "      <td>-0.037466</td>\n",
       "      <td>-0.025199</td>\n",
       "      <td>-0.001524</td>\n",
       "      <td>-0.359315</td>\n",
       "      <td>-0.054658</td>\n",
       "      <td>0.015550</td>\n",
       "      <td>-0.006394</td>\n",
       "      <td>-0.055587</td>\n",
       "      <td>0.120767</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-0.013497</td>\n",
       "      <td>-0.031454</td>\n",
       "      <td>-0.010603</td>\n",
       "      <td>-0.010603</td>\n",
       "      <td>0.055693</td>\n",
       "      <td>-0.046652</td>\n",
       "      <td>0.012414</td>\n",
       "      <td>0.107856</td>\n",
       "      <td>0.008678</td>\n",
       "      <td>-0.007837</td>\n",
       "      <td>0.056171</td>\n",
       "      <td>-0.001482</td>\n",
       "      <td>0.016079</td>\n",
       "      <td>0.007389</td>\n",
       "      <td>0.146186</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>800006</td>\n",
       "      <td>8.0</td>\n",
       "      <td>5</td>\n",
       "      <td>6.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>C</td>\n",
       "      <td>C4</td>\n",
       "      <td>0.0</td>\n",
       "      <td>10+ years</td>\n",
       "      <td>1</td>\n",
       "      <td>9.0</td>\n",
       "      <td>2</td>\n",
       "      <td>2016-11-01</td>\n",
       "      <td>0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>24</td>\n",
       "      <td>7.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>Jan-1998</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>17.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>14.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.168001</td>\n",
       "      <td>0.652008</td>\n",
       "      <td>0.194969</td>\n",
       "      <td>0.151984</td>\n",
       "      <td>0.152694</td>\n",
       "      <td>0.291334</td>\n",
       "      <td>0.004517</td>\n",
       "      <td>-0.080023</td>\n",
       "      <td>0.192890</td>\n",
       "      <td>-0.325732</td>\n",
       "      <td>0.224884</td>\n",
       "      <td>0.237508</td>\n",
       "      <td>0.072657</td>\n",
       "      <td>0.004208</td>\n",
       "      <td>0.079937</td>\n",
       "      <td>0.163994</td>\n",
       "      <td>-0.023223</td>\n",
       "      <td>-0.438466</td>\n",
       "      <td>-0.438466</td>\n",
       "      <td>0.028316</td>\n",
       "      <td>-0.037466</td>\n",
       "      <td>-0.025199</td>\n",
       "      <td>-0.029686</td>\n",
       "      <td>-0.031812</td>\n",
       "      <td>-0.026171</td>\n",
       "      <td>0.015550</td>\n",
       "      <td>-0.006394</td>\n",
       "      <td>-0.041107</td>\n",
       "      <td>0.120767</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-0.013497</td>\n",
       "      <td>0.089790</td>\n",
       "      <td>0.100124</td>\n",
       "      <td>0.100124</td>\n",
       "      <td>0.018189</td>\n",
       "      <td>0.029531</td>\n",
       "      <td>0.012414</td>\n",
       "      <td>0.052799</td>\n",
       "      <td>-0.008783</td>\n",
       "      <td>0.096393</td>\n",
       "      <td>0.043190</td>\n",
       "      <td>-0.001482</td>\n",
       "      <td>0.016079</td>\n",
       "      <td>0.007389</td>\n",
       "      <td>-0.148513</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>800007</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>A</td>\n",
       "      <td>A4</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6 years</td>\n",
       "      <td>0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0</td>\n",
       "      <td>2017-10-01</td>\n",
       "      <td>4</td>\n",
       "      <td>2.0</td>\n",
       "      <td>15</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Feb-2007</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>-0.311320</td>\n",
       "      <td>-0.268618</td>\n",
       "      <td>-1.585257</td>\n",
       "      <td>-0.410185</td>\n",
       "      <td>-1.355566</td>\n",
       "      <td>-1.240844</td>\n",
       "      <td>0.034044</td>\n",
       "      <td>-0.042500</td>\n",
       "      <td>-0.185441</td>\n",
       "      <td>0.089520</td>\n",
       "      <td>-0.367220</td>\n",
       "      <td>0.111884</td>\n",
       "      <td>-0.201407</td>\n",
       "      <td>0.013813</td>\n",
       "      <td>-0.238503</td>\n",
       "      <td>-0.173179</td>\n",
       "      <td>-0.023223</td>\n",
       "      <td>-0.846917</td>\n",
       "      <td>-0.846917</td>\n",
       "      <td>-0.049954</td>\n",
       "      <td>-0.037466</td>\n",
       "      <td>-0.025199</td>\n",
       "      <td>-0.001524</td>\n",
       "      <td>-0.200549</td>\n",
       "      <td>0.035991</td>\n",
       "      <td>-0.021991</td>\n",
       "      <td>0.298157</td>\n",
       "      <td>0.191326</td>\n",
       "      <td>-0.149019</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-0.013497</td>\n",
       "      <td>-0.031454</td>\n",
       "      <td>-0.154578</td>\n",
       "      <td>-0.154578</td>\n",
       "      <td>-0.009372</td>\n",
       "      <td>0.029531</td>\n",
       "      <td>0.024390</td>\n",
       "      <td>-0.101023</td>\n",
       "      <td>0.063696</td>\n",
       "      <td>-0.148058</td>\n",
       "      <td>-0.035732</td>\n",
       "      <td>-0.001482</td>\n",
       "      <td>0.016079</td>\n",
       "      <td>0.007389</td>\n",
       "      <td>-0.148513</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>800008</td>\n",
       "      <td>5.0</td>\n",
       "      <td>5</td>\n",
       "      <td>7.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>D</td>\n",
       "      <td>D2</td>\n",
       "      <td>2.0</td>\n",
       "      <td>3 years</td>\n",
       "      <td>1</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2015-10-01</td>\n",
       "      <td>0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>May-2004</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>21.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>14.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.040077</td>\n",
       "      <td>0.652008</td>\n",
       "      <td>0.339893</td>\n",
       "      <td>-0.023396</td>\n",
       "      <td>0.560321</td>\n",
       "      <td>0.530459</td>\n",
       "      <td>-0.035858</td>\n",
       "      <td>0.012338</td>\n",
       "      <td>0.192890</td>\n",
       "      <td>0.059751</td>\n",
       "      <td>0.060872</td>\n",
       "      <td>-0.051211</td>\n",
       "      <td>0.072657</td>\n",
       "      <td>-0.026490</td>\n",
       "      <td>0.054454</td>\n",
       "      <td>0.163994</td>\n",
       "      <td>0.093865</td>\n",
       "      <td>0.135115</td>\n",
       "      <td>0.135115</td>\n",
       "      <td>0.028316</td>\n",
       "      <td>-0.037466</td>\n",
       "      <td>-0.025199</td>\n",
       "      <td>-0.007056</td>\n",
       "      <td>-0.200549</td>\n",
       "      <td>-0.027039</td>\n",
       "      <td>0.015550</td>\n",
       "      <td>-0.006394</td>\n",
       "      <td>0.164217</td>\n",
       "      <td>0.120767</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.132020</td>\n",
       "      <td>0.029135</td>\n",
       "      <td>-0.081708</td>\n",
       "      <td>-0.081708</td>\n",
       "      <td>-0.009372</td>\n",
       "      <td>0.029352</td>\n",
       "      <td>0.066614</td>\n",
       "      <td>-0.038488</td>\n",
       "      <td>0.063696</td>\n",
       "      <td>-0.075945</td>\n",
       "      <td>0.043190</td>\n",
       "      <td>-0.001482</td>\n",
       "      <td>0.016079</td>\n",
       "      <td>0.007389</td>\n",
       "      <td>0.261988</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>800009</td>\n",
       "      <td>6.0</td>\n",
       "      <td>3</td>\n",
       "      <td>1.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>A</td>\n",
       "      <td>A4</td>\n",
       "      <td>5.0</td>\n",
       "      <td>10+ years</td>\n",
       "      <td>0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>0</td>\n",
       "      <td>2014-08-01</td>\n",
       "      <td>4</td>\n",
       "      <td>2.0</td>\n",
       "      <td>26</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>Sep-1992</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.100969</td>\n",
       "      <td>-0.268618</td>\n",
       "      <td>-0.975234</td>\n",
       "      <td>0.066927</td>\n",
       "      <td>-1.355566</td>\n",
       "      <td>-1.240844</td>\n",
       "      <td>0.021600</td>\n",
       "      <td>-0.080023</td>\n",
       "      <td>-0.185441</td>\n",
       "      <td>-0.325732</td>\n",
       "      <td>-0.367220</td>\n",
       "      <td>-0.097779</td>\n",
       "      <td>-0.201407</td>\n",
       "      <td>0.013813</td>\n",
       "      <td>-0.062811</td>\n",
       "      <td>-0.173179</td>\n",
       "      <td>-0.023223</td>\n",
       "      <td>-0.226416</td>\n",
       "      <td>-0.226416</td>\n",
       "      <td>-0.021826</td>\n",
       "      <td>-0.037466</td>\n",
       "      <td>-0.025199</td>\n",
       "      <td>0.047673</td>\n",
       "      <td>0.140108</td>\n",
       "      <td>-0.027039</td>\n",
       "      <td>0.015550</td>\n",
       "      <td>-0.006394</td>\n",
       "      <td>-0.094883</td>\n",
       "      <td>-0.149019</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.104123</td>\n",
       "      <td>-0.091526</td>\n",
       "      <td>-0.081708</td>\n",
       "      <td>-0.081708</td>\n",
       "      <td>-0.028484</td>\n",
       "      <td>-0.001286</td>\n",
       "      <td>-0.004990</td>\n",
       "      <td>-0.101023</td>\n",
       "      <td>0.011132</td>\n",
       "      <td>-0.075945</td>\n",
       "      <td>-0.004251</td>\n",
       "      <td>-0.001482</td>\n",
       "      <td>0.016079</td>\n",
       "      <td>0.007389</td>\n",
       "      <td>0.146186</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       id  loanAmnt term  interestRate  installment grade subGrade  \\\n",
       "0  800000       5.0    3           3.0          6.0     B       B3   \n",
       "1  800001       7.0    5           6.0          6.0     C       C5   \n",
       "2  800002       5.0    3           9.0          5.0     D       D4   \n",
       "3  800003       6.0    5           6.0          5.0     C       C4   \n",
       "4  800004       9.0    3           8.0          9.0     D       D1   \n",
       "5  800005       6.0    3           0.0          6.0     A       A1   \n",
       "6  800006       8.0    5           6.0          7.0     C       C4   \n",
       "7  800007       0.0    3           0.0          0.0     A       A4   \n",
       "8  800008       5.0    5           7.0          3.0     D       D2   \n",
       "9  800009       6.0    3           1.0          6.0     A       A4   \n",
       "\n",
       "   employmentTitle employmentLength homeOwnership  annualIncome  \\\n",
       "0              4.0        10+ years             0           6.0   \n",
       "1              6.0        10+ years             0           3.0   \n",
       "2              5.0          2 years             1           4.0   \n",
       "3              2.0          4 years             0           1.0   \n",
       "4              9.0         < 1 year             1           6.0   \n",
       "5              2.0        10+ years             0           7.0   \n",
       "6              0.0        10+ years             1           9.0   \n",
       "7              4.0          6 years             0           3.0   \n",
       "8              2.0          3 years             1           4.0   \n",
       "9              5.0        10+ years             0           9.0   \n",
       "\n",
       "  verificationStatus   issueDate purpose  postCode regionCode  dti  \\\n",
       "0                  0  2014-07-01       0       4.0         21  2.0   \n",
       "1                  0  2015-07-01       2       5.0          8  6.0   \n",
       "2                  2  2016-10-01       0       8.0         20  9.0   \n",
       "3                  1  2014-11-01       4       5.0         11  3.0   \n",
       "4                  1  2017-10-01       0       2.0          8  7.0   \n",
       "5                  0  2017-05-01       0       8.0          8  3.0   \n",
       "6                  2  2016-11-01       0       6.0         24  7.0   \n",
       "7                  0  2017-10-01       4       2.0         15  3.0   \n",
       "8                  1  2015-10-01       0       3.0          0  7.0   \n",
       "9                  0  2014-08-01       4       2.0         26  3.0   \n",
       "\n",
       "   delinquency_2years  ficoRangeLow  ficoRangeHigh  openAcc  pubRec  \\\n",
       "0                 2.0           7.0            7.0      8.0     1.0   \n",
       "1                 2.0           2.0            2.0      0.0     1.0   \n",
       "2                 1.0           7.0            7.0      6.0     1.0   \n",
       "3                 1.0           4.0            4.0      4.0     2.0   \n",
       "4                 1.0           4.0            4.0      9.0     1.0   \n",
       "5                 1.0           9.0            9.0      8.0     1.0   \n",
       "6                 1.0           8.0            8.0      7.0     1.0   \n",
       "7                 1.0           9.0            9.0      3.0     1.0   \n",
       "8                 2.0           4.0            4.0      7.0     1.0   \n",
       "9                 1.0           7.0            7.0      4.0     1.0   \n",
       "\n",
       "   pubRecBankruptcies  revolBal  revolUtil  totalAcc initialListStatus  \\\n",
       "0                 1.0       4.0        2.0       6.0                 0   \n",
       "1                 1.0       4.0        5.0       1.0                 0   \n",
       "2                 1.0       0.0        0.0       9.0                 1   \n",
       "3                 2.0       4.0        5.0       3.0                 0   \n",
       "4                 1.0       9.0        2.0       4.0                 0   \n",
       "5                 1.0       2.0        0.0       8.0                 0   \n",
       "6                 1.0       8.0        3.0       5.0                 0   \n",
       "7                 1.0       2.0        1.0       2.0                 1   \n",
       "8                 1.0       0.0        1.0       6.0                 0   \n",
       "9                 1.0       6.0        8.0       6.0                 0   \n",
       "\n",
       "  applicationType earliesCreditLine  title  policyCode   n0   n1    n2    n3  \\\n",
       "0               0          Nov-1974    1.0         1.0  1.0  4.0   6.0   6.0   \n",
       "1               0          Jul-2001    3.0         1.0  2.0  1.0   3.0   3.0   \n",
       "2               0          Aug-2006    1.0         1.0  0.0  1.0   4.0   4.0   \n",
       "3               0          Jul-2002    3.0         1.0  0.0  2.0   2.0   2.0   \n",
       "4               0          Dec-2000    1.0         1.0  0.0  8.0  11.0  11.0   \n",
       "5               0          Jul-2000    1.0         1.0  0.0  3.0   5.0   5.0   \n",
       "6               0          Jan-1998    1.0         1.0  0.0  5.0   7.0   7.0   \n",
       "7               1          Feb-2007    3.0         1.0  0.0  3.0   3.0   3.0   \n",
       "8               0          May-2004    1.0         1.0  1.0  4.0   4.0   4.0   \n",
       "9               0          Sep-1992    3.0         1.0  2.0  2.0   4.0   4.0   \n",
       "\n",
       "    n4    n5    n6    n7    n8    n9   n10  n11  n12  n13  n14  loanAmnt_woe  \\\n",
       "0  6.0   8.0   4.0  15.0  19.0   6.0  17.0  0.0  0.0  1.0  3.0      0.040077   \n",
       "1  1.0   1.0   3.0   3.0   9.0   3.0   5.0  0.0  0.0  2.0  2.0      0.150643   \n",
       "2  1.0   1.0  36.0   5.0   6.0   4.0  12.0  0.0  0.0  0.0  7.0      0.040077   \n",
       "3  4.0   7.0   2.0   8.0  14.0   2.0  10.0  0.0  0.0  0.0  3.0      0.100969   \n",
       "4  9.0  11.0   3.0  16.0  18.0  11.0  19.0  0.0  0.0  0.0  1.0      0.212384   \n",
       "5  8.0  14.0   6.0  13.0  25.0   5.0  17.0  0.0  0.0  0.0  3.0      0.100969   \n",
       "6  6.0   6.0   6.0  10.0  17.0   7.0  14.0  0.0  0.0  0.0  1.0      0.168001   \n",
       "7  4.0   6.0   8.0   4.0   8.0   3.0   8.0  0.0  0.0  0.0  1.0     -0.311320   \n",
       "8  4.0   5.0  21.0   5.0   8.0   4.0  14.0  0.0  0.0  0.0  4.0      0.040077   \n",
       "9  2.0   7.0  10.0   4.0  13.0   4.0  10.0  0.0  0.0  0.0  3.0      0.100969   \n",
       "\n",
       "   term_woe  interestRate_woe  installment_woe  grade_woe  subGrade_woe  \\\n",
       "0 -0.268618         -0.321798         0.066927  -0.485412     -0.518363   \n",
       "1  0.652008          0.194969         0.066927   0.152694      0.351410   \n",
       "2 -0.268618          0.980909         0.107892   0.560321      0.648698   \n",
       "3  0.652008          0.194969         0.107892   0.152694      0.291334   \n",
       "4 -0.268618          0.613972         0.179946   0.560321      0.434848   \n",
       "5 -0.268618         -1.585257         0.066927  -1.355566     -2.022757   \n",
       "6  0.652008          0.194969         0.151984   0.152694      0.291334   \n",
       "7 -0.268618         -1.585257        -0.410185  -1.355566     -1.240844   \n",
       "8  0.652008          0.339893        -0.023396   0.560321      0.530459   \n",
       "9 -0.268618         -0.975234         0.066927  -1.355566     -1.240844   \n",
       "\n",
       "   employmentTitle_woe  employmentLength_woe  homeOwnership_woe  \\\n",
       "0             0.034044             -0.080023          -0.185441   \n",
       "1             0.004855             -0.080023          -0.185441   \n",
       "2             0.021600              0.003257           0.192890   \n",
       "3            -0.035858             -0.007665          -0.185441   \n",
       "4            -0.144750              0.033287           0.192890   \n",
       "5            -0.035858             -0.080023          -0.185441   \n",
       "6             0.004517             -0.080023           0.192890   \n",
       "7             0.034044             -0.042500          -0.185441   \n",
       "8            -0.035858              0.012338           0.192890   \n",
       "9             0.021600             -0.080023          -0.185441   \n",
       "\n",
       "   annualIncome_woe  verificationStatus_woe  issueDate_woe  purpose_woe  \\\n",
       "0         -0.063052               -0.367220      -0.070409     0.072657   \n",
       "1          0.089520               -0.367220      -0.000066    -0.156289   \n",
       "2          0.059751                0.224884       0.142289     0.072657   \n",
       "3          0.182694                0.060872      -0.068065    -0.201407   \n",
       "4         -0.063052                0.060872       0.111884     0.072657   \n",
       "5         -0.136416               -0.367220       0.223627     0.072657   \n",
       "6         -0.325732                0.224884       0.237508     0.072657   \n",
       "7          0.089520               -0.367220       0.111884    -0.201407   \n",
       "8          0.059751                0.060872      -0.051211     0.072657   \n",
       "9         -0.325732               -0.367220      -0.097779    -0.201407   \n",
       "\n",
       "   postCode_woe  regionCode_woe   dti_woe  delinquency_2years_woe  \\\n",
       "0     -0.024552        0.096883 -0.253407                0.093865   \n",
       "1     -0.021413       -0.027768  0.065022                0.093865   \n",
       "2      0.033406        0.215663  0.492354               -0.023223   \n",
       "3     -0.021413        0.074603 -0.173179               -0.023223   \n",
       "4      0.013813       -0.027768  0.163994               -0.023223   \n",
       "5      0.033406       -0.027768 -0.173179               -0.023223   \n",
       "6      0.004208        0.079937  0.163994               -0.023223   \n",
       "7      0.013813       -0.238503 -0.173179               -0.023223   \n",
       "8     -0.026490        0.054454  0.163994                0.093865   \n",
       "9      0.013813       -0.062811 -0.173179               -0.023223   \n",
       "\n",
       "   ficoRangeLow_woe  ficoRangeHigh_woe  openAcc_woe  pubRec_woe  \\\n",
       "0         -0.226416          -0.226416     0.054652   -0.037466   \n",
       "1          0.282023           0.282023    -0.097486   -0.037466   \n",
       "2         -0.226416          -0.226416     0.018438   -0.037466   \n",
       "3          0.135115           0.135115    -0.021826    0.173080   \n",
       "4          0.135115           0.135115     0.124319   -0.037466   \n",
       "5         -0.846917          -0.846917     0.054652   -0.037466   \n",
       "6         -0.438466          -0.438466     0.028316   -0.037466   \n",
       "7         -0.846917          -0.846917    -0.049954   -0.037466   \n",
       "8          0.135115           0.135115     0.028316   -0.037466   \n",
       "9         -0.226416          -0.226416    -0.021826   -0.037466   \n",
       "\n",
       "   pubRecBankruptcies_woe  revolBal_woe  revolUtil_woe  totalAcc_woe  \\\n",
       "0               -0.025199      0.031703      -0.101815     -0.027039   \n",
       "1               -0.025199      0.031703       0.069930      0.064885   \n",
       "2               -0.025199     -0.007056      -0.359315     -0.022366   \n",
       "3                0.167573      0.031703       0.069930      0.015964   \n",
       "4               -0.025199     -0.172406      -0.101815     -0.007714   \n",
       "5               -0.025199     -0.001524      -0.359315     -0.054658   \n",
       "6               -0.025199     -0.029686      -0.031812     -0.026171   \n",
       "7               -0.025199     -0.001524      -0.200549      0.035991   \n",
       "8               -0.025199     -0.007056      -0.200549     -0.027039   \n",
       "9               -0.025199      0.047673       0.140108     -0.027039   \n",
       "\n",
       "   initialListStatus_woe  applicationType_woe  earliesCreditLine_woe  \\\n",
       "0               0.015550            -0.006394              -0.302332   \n",
       "1               0.015550            -0.006394              -0.096155   \n",
       "2              -0.021991            -0.006394               0.019603   \n",
       "3               0.015550            -0.006394               0.009658   \n",
       "4               0.015550            -0.006394              -0.086193   \n",
       "5               0.015550            -0.006394              -0.055587   \n",
       "6               0.015550            -0.006394              -0.041107   \n",
       "7              -0.021991             0.298157               0.191326   \n",
       "8               0.015550            -0.006394               0.164217   \n",
       "9               0.015550            -0.006394              -0.094883   \n",
       "\n",
       "   title_woe  policyCode_woe    n0_woe    n1_woe    n2_woe    n3_woe  \\\n",
       "0   0.120767             0.0  0.132020  0.029135  0.044226  0.044226   \n",
       "1  -0.149019             0.0  0.104123 -0.076569 -0.154578 -0.154578   \n",
       "2   0.120767             0.0 -0.013497 -0.076569 -0.081708 -0.081708   \n",
       "3  -0.149019             0.0 -0.013497 -0.091526 -0.225819 -0.225819   \n",
       "4   0.120767             0.0 -0.013497  0.255149  0.311136  0.311136   \n",
       "5   0.120767             0.0 -0.013497 -0.031454 -0.010603 -0.010603   \n",
       "6   0.120767             0.0 -0.013497  0.089790  0.100124  0.100124   \n",
       "7  -0.149019             0.0 -0.013497 -0.031454 -0.154578 -0.154578   \n",
       "8   0.120767             0.0  0.132020  0.029135 -0.081708 -0.081708   \n",
       "9  -0.149019             0.0  0.104123 -0.091526 -0.081708 -0.081708   \n",
       "\n",
       "     n4_woe    n5_woe    n6_woe    n7_woe    n8_woe    n9_woe   n10_woe  \\\n",
       "0  0.018189 -0.007806  0.016291  0.159193 -0.008055  0.051284  0.056171   \n",
       "1  0.033582  0.222077  0.006573 -0.087261  0.015904 -0.148058 -0.097875   \n",
       "2  0.033582  0.222077  0.102422 -0.038488  0.031608 -0.075945  0.032834   \n",
       "3 -0.009372 -0.001286  0.020473  0.028498  0.004929 -0.220500 -0.004251   \n",
       "4  0.075601 -0.034298  0.006573  0.151697 -0.010330  0.307821  0.058839   \n",
       "5  0.055693 -0.046652  0.012414  0.107856  0.008678 -0.007837  0.056171   \n",
       "6  0.018189  0.029531  0.012414  0.052799 -0.008783  0.096393  0.043190   \n",
       "7 -0.009372  0.029531  0.024390 -0.101023  0.063696 -0.148058 -0.035732   \n",
       "8 -0.009372  0.029352  0.066614 -0.038488  0.063696 -0.075945  0.043190   \n",
       "9 -0.028484 -0.001286 -0.004990 -0.101023  0.011132 -0.075945 -0.004251   \n",
       "\n",
       "    n11_woe   n12_woe   n13_woe   n14_woe  \n",
       "0 -0.001482  0.016079  0.165441  0.146186  \n",
       "1 -0.001482  0.016079  0.152472  0.002365  \n",
       "2 -0.001482  0.016079  0.007389  0.469499  \n",
       "3 -0.001482  0.016079  0.007389  0.146186  \n",
       "4 -0.001482  0.016079  0.007389 -0.148513  \n",
       "5 -0.001482  0.016079  0.007389  0.146186  \n",
       "6 -0.001482  0.016079  0.007389 -0.148513  \n",
       "7 -0.001482  0.016079  0.007389 -0.148513  \n",
       "8 -0.001482  0.016079  0.007389  0.261988  \n",
       "9 -0.001482  0.016079  0.007389  0.146186  "
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test_df_woe = woe_encoder.transform(test_df_bucketed)\n",
    "test_df_woe.limit(10).toPandas()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
